-
Notifications
You must be signed in to change notification settings - Fork 474
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Feature] add lveval benchmark #914
Merged
Mor-Li
merged 5 commits into
open-compass:main
from
yuantao2108:lveval_to_merge_clean_commits
Mar 4, 2024
Merged
Changes from 1 commit
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .lvevalcmrc_mixup.lveval_cmrc_mixup_gen import ( | ||
LVEval_cmrc_mixup_datasets, | ||
) | ||
from .lvevaldureader_mixup.lveval_dureader_mixup_gen import ( | ||
LVEval_dureader_mixup_datasets, | ||
) | ||
from .lvevalfactrecall_en.lveval_factrecall_en_gen import ( | ||
LVEval_factrecall_en_datasets, | ||
) | ||
from .lvevalfactrecall_zh.lveval_factrecall_zh_gen import ( | ||
LVEval_factrecall_zh_datasets, | ||
) | ||
from .lvevalhotpotwikiqa_mixup.lveval_hotpotwikiqa_mixup_gen import ( | ||
LVEval_hotpotwikiqa_mixup_datasets, | ||
) | ||
from .lvevallic_mixup.lveval_lic_mixup_gen import LVEval_lic_mixup_datasets | ||
from .lvevalloogle_CR_mixup.lveval_loogle_CR_mixup_gen import ( | ||
LVEval_loogle_CR_mixup_datasets, | ||
) | ||
from .lvevalloogle_MIR_mixup.lveval_loogle_MIR_mixup_gen import ( | ||
LVEval_loogle_MIR_mixup_datasets, | ||
) | ||
from .lvevalloogle_SD_mixup.lveval_loogle_SD_mixup_gen import ( | ||
LVEval_loogle_SD_mixup_datasets, | ||
) | ||
from .lvevalmultifieldqa_en_mixup.lveval_multifieldqa_en_mixup_gen import ( | ||
LVEval_multifieldqa_en_mixup_datasets, | ||
) | ||
from .lvevalmultifieldqa_zh_mixup.lveval_multifieldqa_zh_mixup_gen import ( | ||
LVEval_multifieldqa_zh_mixup_datasets, | ||
) | ||
|
||
LVEval_datasets = sum( | ||
(v for k, v in locals().items() if k.endswith("_datasets")), [] | ||
) |
6 changes: 6 additions & 0 deletions
6
configs/datasets/lveval/lvevalcmrc_mixup/lveval_cmrc_mixup_gen.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .lveval_cmrc_mixup_gen_465823 import ( | ||
LVEval_cmrc_mixup_datasets, | ||
) # noqa: F401, F403 |
54 changes: 54 additions & 0 deletions
54
configs/datasets/lveval/lvevalcmrc_mixup/lveval_cmrc_mixup_gen_465823.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import LVEvalOPTF1Evaluator, LVEvalcmrcDataset | ||
|
||
LVEval_cmrc_mixup_reader_cfg = dict( | ||
input_columns=["context", "input"], | ||
output_column="answers", | ||
train_split="test", | ||
test_split="test", | ||
) | ||
|
||
LVEval_cmrc_mixup_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role="HUMAN", | ||
prompt="请根据下面给定的文章回答问题,问题和答案只与其中一篇文章有关。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题,问题和答案只与其中一篇文章有关。\n\n问题:{input}\n回答:", | ||
), | ||
], | ||
), | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=64), | ||
) | ||
|
||
LVEval_cmrc_mixup_eval_cfg = dict( | ||
evaluator=dict(type=LVEvalOPTF1Evaluator, language="zh"), pred_role="BOT" | ||
) | ||
|
||
DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"] | ||
|
||
|
||
def get_dataset_names(dataset_name, length_levels): | ||
datasets = [] | ||
for length in length_levels: | ||
datasets.append(f"{dataset_name}_{length}") | ||
return datasets | ||
|
||
|
||
LVEval_cmrc_mixup_datasets = [ | ||
dict( | ||
type=LVEvalcmrcDataset, | ||
abbr="LVEval_" + name_len, | ||
path="Infinigence/LVEval", | ||
name=name_len, | ||
reader_cfg=LVEval_cmrc_mixup_reader_cfg, | ||
infer_cfg=LVEval_cmrc_mixup_infer_cfg, | ||
eval_cfg=LVEval_cmrc_mixup_eval_cfg, | ||
) | ||
for name_len in get_dataset_names("cmrc_mixup", DATASET_LENGTH_LEVEL) | ||
] |
6 changes: 6 additions & 0 deletions
6
configs/datasets/lveval/lvevaldureader_mixup/lveval_dureader_mixup_gen.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .lveval_dureader_mixup_gen_465823 import ( | ||
LVEval_dureader_mixup_datasets, | ||
) # noqa: F401, F403 |
55 changes: 55 additions & 0 deletions
55
configs/datasets/lveval/lvevaldureader_mixup/lveval_dureader_mixup_gen_465823.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import LVEvalOPTRougeEvaluator, LVEvaldureaderDataset | ||
|
||
LVEval_dureader_mixup_reader_cfg = dict( | ||
input_columns=["context", "input"], | ||
output_column="answers", | ||
train_split="test", | ||
test_split="test", | ||
) | ||
|
||
LVEval_dureader_mixup_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role="HUMAN", | ||
prompt="请根据下面给定的文章回答问题,问题和答案只与其中一篇文章有关。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题,问题和答案只与其中一篇文章有关。\n\n问题:{input}\n回答:", | ||
), | ||
], | ||
), | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=64), | ||
) | ||
|
||
LVEval_dureader_mixup_eval_cfg = dict( | ||
evaluator=dict(type=LVEvalOPTRougeEvaluator, language="zh"), | ||
pred_role="BOT", | ||
) | ||
|
||
DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"] | ||
|
||
|
||
def get_dataset_names(dataset_name, length_levels): | ||
datasets = [] | ||
for length in length_levels: | ||
datasets.append(f"{dataset_name}_{length}") | ||
return datasets | ||
|
||
|
||
LVEval_dureader_mixup_datasets = [ | ||
dict( | ||
type=LVEvaldureaderDataset, | ||
abbr="LVEval_" + name_len, | ||
path="Infinigence/LVEval", | ||
name=name_len, | ||
reader_cfg=LVEval_dureader_mixup_reader_cfg, | ||
infer_cfg=LVEval_dureader_mixup_infer_cfg, | ||
eval_cfg=LVEval_dureader_mixup_eval_cfg, | ||
) | ||
for name_len in get_dataset_names("dureader_mixup", DATASET_LENGTH_LEVEL) | ||
] |
6 changes: 6 additions & 0 deletions
6
configs/datasets/lveval/lvevalfactrecall_en/lveval_factrecall_en_gen.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .lveval_factrecall_en_gen_9a836f import ( | ||
LVEval_factrecall_en_datasets, | ||
) # noqa: F401, F403 |
54 changes: 54 additions & 0 deletions
54
configs/datasets/lveval/lvevalfactrecall_en/lveval_factrecall_en_gen_9a836f.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import LVEvalF1Evaluator, LVEvalfactrecallenDataset | ||
|
||
LVEval_factrecall_en_reader_cfg = dict( | ||
input_columns=["context", "input"], | ||
output_column="answers", | ||
train_split="test", | ||
test_split="test", | ||
) | ||
|
||
LVEval_factrecall_en_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role="HUMAN", | ||
prompt="Please answer the following questions based on the given article.\n\nArticle: {context}\n\nPlease answer the following questions based on the above article.\n\nQuestion: {input}\nAnswer:", | ||
), | ||
], | ||
), | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=16), | ||
) | ||
|
||
LVEval_factrecall_en_eval_cfg = dict( | ||
evaluator=dict(type=LVEvalF1Evaluator, language="en"), pred_role="BOT" | ||
) | ||
|
||
DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"] | ||
|
||
|
||
def get_dataset_names(dataset_name, length_levels): | ||
datasets = [] | ||
for length in length_levels: | ||
datasets.append(f"{dataset_name}_{length}") | ||
return datasets | ||
|
||
|
||
LVEval_factrecall_en_datasets = [ | ||
dict( | ||
type=LVEvalfactrecallenDataset, | ||
abbr="LVEval_" + name_len, | ||
path="Infinigence/LVEval", | ||
name=name_len, | ||
reader_cfg=LVEval_factrecall_en_reader_cfg, | ||
infer_cfg=LVEval_factrecall_en_infer_cfg, | ||
eval_cfg=LVEval_factrecall_en_eval_cfg, | ||
) | ||
for name_len in get_dataset_names("factrecall_en", DATASET_LENGTH_LEVEL) | ||
] |
6 changes: 6 additions & 0 deletions
6
configs/datasets/lveval/lvevalfactrecall_zh/lveval_factrecall_zh_gen.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .lveval_factrecall_zh_gen_dbee70 import ( | ||
LVEval_factrecall_zh_datasets, | ||
) # noqa: F401, F403 |
54 changes: 54 additions & 0 deletions
54
configs/datasets/lveval/lvevalfactrecall_zh/lveval_factrecall_zh_gen_dbee70.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import LVEvalF1Evaluator, LVEvalfactrecallzhDataset | ||
|
||
LVEval_factrecall_zh_reader_cfg = dict( | ||
input_columns=["context", "input"], | ||
output_column="answers", | ||
train_split="test", | ||
test_split="test", | ||
) | ||
|
||
LVEval_factrecall_zh_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role="HUMAN", | ||
prompt="请基于给定的文章回答下述问题。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:", | ||
), | ||
], | ||
), | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=16), | ||
) | ||
|
||
LVEval_factrecall_zh_eval_cfg = dict( | ||
evaluator=dict(type=LVEvalF1Evaluator, language="zh"), pred_role="BOT" | ||
) | ||
|
||
DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"] | ||
|
||
|
||
def get_dataset_names(dataset_name, length_levels): | ||
datasets = [] | ||
for length in length_levels: | ||
datasets.append(f"{dataset_name}_{length}") | ||
return datasets | ||
|
||
|
||
LVEval_factrecall_zh_datasets = [ | ||
dict( | ||
type=LVEvalfactrecallzhDataset, | ||
abbr="LVEval_" + name_len, | ||
path="Infinigence/LVEval", | ||
name=name_len, | ||
reader_cfg=LVEval_factrecall_zh_reader_cfg, | ||
infer_cfg=LVEval_factrecall_zh_infer_cfg, | ||
eval_cfg=LVEval_factrecall_zh_eval_cfg, | ||
) | ||
for name_len in get_dataset_names("factrecall_zh", DATASET_LENGTH_LEVEL) | ||
] |
6 changes: 6 additions & 0 deletions
6
configs/datasets/lveval/lvevalhotpotwikiqa_mixup/lveval_hotpotwikiqa_mixup_gen.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .lveval_hotpotwikiqa_mixup_gen_77ce82 import ( | ||
LVEval_hotpotwikiqa_mixup_datasets, | ||
) # noqa: F401, F403 |
59 changes: 59 additions & 0 deletions
59
configs/datasets/lveval/lvevalhotpotwikiqa_mixup/lveval_hotpotwikiqa_mixup_gen_77ce82.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from opencompass.openicl.icl_prompt_template import PromptTemplate | ||
from opencompass.openicl.icl_retriever import ZeroRetriever | ||
from opencompass.openicl.icl_inferencer import GenInferencer | ||
from opencompass.datasets import ( | ||
LVEvalOPTF1Evaluator, | ||
LVEvalhotpotwikiqaDataset, | ||
) | ||
|
||
LVEval_hotpotwikiqa_mixup_reader_cfg = dict( | ||
input_columns=["context", "input"], | ||
output_column="answers", | ||
train_split="test", | ||
test_split="test", | ||
) | ||
|
||
LVEval_hotpotwikiqa_mixup_infer_cfg = dict( | ||
prompt_template=dict( | ||
type=PromptTemplate, | ||
template=dict( | ||
round=[ | ||
dict( | ||
role="HUMAN", | ||
prompt="Answer the question based on the given passages. Questions and answers are only relevant to some passages. Only give me the answer and do not output any other explanation and evidence.\n\nArticle: {context}\n\nPlease answer the following question based on the above passages. Questions and answers are only relevant to some passages. Only give me the answer and do not output any other explanation and evidence.\n\nQuestion: {input}\nAnswer:", | ||
), | ||
], | ||
), | ||
), | ||
retriever=dict(type=ZeroRetriever), | ||
inferencer=dict(type=GenInferencer, max_out_len=64), | ||
) | ||
|
||
LVEval_hotpotwikiqa_mixup_eval_cfg = dict( | ||
evaluator=dict(type=LVEvalOPTF1Evaluator, language="en"), pred_role="BOT" | ||
) | ||
|
||
DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"] | ||
|
||
|
||
def get_dataset_names(dataset_name, length_levels): | ||
datasets = [] | ||
for length in length_levels: | ||
datasets.append(f"{dataset_name}_{length}") | ||
return datasets | ||
|
||
|
||
LVEval_hotpotwikiqa_mixup_datasets = [ | ||
dict( | ||
type=LVEvalhotpotwikiqaDataset, | ||
abbr="LVEval_" + name_len, | ||
path="Infinigence/LVEval", | ||
name=name_len, | ||
reader_cfg=LVEval_hotpotwikiqa_mixup_reader_cfg, | ||
infer_cfg=LVEval_hotpotwikiqa_mixup_infer_cfg, | ||
eval_cfg=LVEval_hotpotwikiqa_mixup_eval_cfg, | ||
) | ||
for name_len in get_dataset_names( | ||
"hotpotwikiqa_mixup", DATASET_LENGTH_LEVEL | ||
) | ||
] |
6 changes: 6 additions & 0 deletions
6
configs/datasets/lveval/lvevallic_mixup/lveval_lic_mixup_gen.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from mmengine.config import read_base | ||
|
||
with read_base(): | ||
from .lveval_lic_mixup_gen_01eb0c import ( | ||
LVEval_lic_mixup_datasets, | ||
) # noqa: F401, F403 |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for the PR. I noticed the dataset path (Infinigence/LVEval) isn't relative to our project's root. Could you please update it to a relative path, like ./data/LVEval, for better consistency and accessibility?
Also, I can't access the dataset at this path. Could you provide a sample or mock data for testing purposes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a huggingface data path, which should be directly loaded by hf datasets lib. Could you please access or download from this URL: https://huggingface.co/datasets/Infinigence/LVEval ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for the misunderstanding regarding your data path. My mistake, and I appreciate your patience.