Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] add lveval benchmark #914

Merged
merged 5 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions configs/datasets/lveval/lveval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from mmengine.config import read_base

with read_base():
from .lvevalcmrc_mixup.lveval_cmrc_mixup_gen import (
LVEval_cmrc_mixup_datasets,
)
from .lvevaldureader_mixup.lveval_dureader_mixup_gen import (
LVEval_dureader_mixup_datasets,
)
from .lvevalfactrecall_en.lveval_factrecall_en_gen import (
LVEval_factrecall_en_datasets,
)
from .lvevalfactrecall_zh.lveval_factrecall_zh_gen import (
LVEval_factrecall_zh_datasets,
)
from .lvevalhotpotwikiqa_mixup.lveval_hotpotwikiqa_mixup_gen import (
LVEval_hotpotwikiqa_mixup_datasets,
)
from .lvevallic_mixup.lveval_lic_mixup_gen import LVEval_lic_mixup_datasets
from .lvevalloogle_CR_mixup.lveval_loogle_CR_mixup_gen import (
LVEval_loogle_CR_mixup_datasets,
)
from .lvevalloogle_MIR_mixup.lveval_loogle_MIR_mixup_gen import (
LVEval_loogle_MIR_mixup_datasets,
)
from .lvevalloogle_SD_mixup.lveval_loogle_SD_mixup_gen import (
LVEval_loogle_SD_mixup_datasets,
)
from .lvevalmultifieldqa_en_mixup.lveval_multifieldqa_en_mixup_gen import (
LVEval_multifieldqa_en_mixup_datasets,
)
from .lvevalmultifieldqa_zh_mixup.lveval_multifieldqa_zh_mixup_gen import (
LVEval_multifieldqa_zh_mixup_datasets,
)

LVEval_datasets = sum(
(v for k, v in locals().items() if k.endswith("_datasets")), []
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mmengine.config import read_base

with read_base():
from .lveval_cmrc_mixup_gen_465823 import (
LVEval_cmrc_mixup_datasets,
) # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LVEvalOPTF1Evaluator, LVEvalcmrcDataset

LVEval_cmrc_mixup_reader_cfg = dict(
input_columns=["context", "input"],
output_column="answers",
train_split="test",
test_split="test",
)

LVEval_cmrc_mixup_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt="请根据下面给定的文章回答问题,问题和答案只与其中一篇文章有关。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题,问题和答案只与其中一篇文章有关。\n\n问题:{input}\n回答:",
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64),
)

LVEval_cmrc_mixup_eval_cfg = dict(
evaluator=dict(type=LVEvalOPTF1Evaluator, language="zh"), pred_role="BOT"
)

DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"]


def get_dataset_names(dataset_name, length_levels):
datasets = []
for length in length_levels:
datasets.append(f"{dataset_name}_{length}")
return datasets


LVEval_cmrc_mixup_datasets = [
dict(
type=LVEvalcmrcDataset,
abbr="LVEval_" + name_len,
path="Infinigence/LVEval",
Copy link
Collaborator

@Mor-Li Mor-Li Feb 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the PR. I noticed the dataset path (Infinigence/LVEval) isn't relative to our project's root. Could you please update it to a relative path, like ./data/LVEval, for better consistency and accessibility?

Also, I can't access the dataset at this path. Could you provide a sample or mock data for testing purposes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a huggingface data path, which should be directly loaded by hf datasets lib. Could you please access or download from this URL: https://huggingface.co/datasets/Infinigence/LVEval ?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the misunderstanding regarding your data path. My mistake, and I appreciate your patience.

name=name_len,
reader_cfg=LVEval_cmrc_mixup_reader_cfg,
infer_cfg=LVEval_cmrc_mixup_infer_cfg,
eval_cfg=LVEval_cmrc_mixup_eval_cfg,
)
for name_len in get_dataset_names("cmrc_mixup", DATASET_LENGTH_LEVEL)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mmengine.config import read_base

with read_base():
from .lveval_dureader_mixup_gen_465823 import (
LVEval_dureader_mixup_datasets,
) # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LVEvalOPTRougeEvaluator, LVEvaldureaderDataset

LVEval_dureader_mixup_reader_cfg = dict(
input_columns=["context", "input"],
output_column="answers",
train_split="test",
test_split="test",
)

LVEval_dureader_mixup_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt="请根据下面给定的文章回答问题,问题和答案只与其中一篇文章有关。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题,问题和答案只与其中一篇文章有关。\n\n问题:{input}\n回答:",
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64),
)

LVEval_dureader_mixup_eval_cfg = dict(
evaluator=dict(type=LVEvalOPTRougeEvaluator, language="zh"),
pred_role="BOT",
)

DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"]


def get_dataset_names(dataset_name, length_levels):
datasets = []
for length in length_levels:
datasets.append(f"{dataset_name}_{length}")
return datasets


LVEval_dureader_mixup_datasets = [
dict(
type=LVEvaldureaderDataset,
abbr="LVEval_" + name_len,
path="Infinigence/LVEval",
name=name_len,
reader_cfg=LVEval_dureader_mixup_reader_cfg,
infer_cfg=LVEval_dureader_mixup_infer_cfg,
eval_cfg=LVEval_dureader_mixup_eval_cfg,
)
for name_len in get_dataset_names("dureader_mixup", DATASET_LENGTH_LEVEL)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mmengine.config import read_base

with read_base():
from .lveval_factrecall_en_gen_9a836f import (
LVEval_factrecall_en_datasets,
) # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LVEvalF1Evaluator, LVEvalfactrecallenDataset

LVEval_factrecall_en_reader_cfg = dict(
input_columns=["context", "input"],
output_column="answers",
train_split="test",
test_split="test",
)

LVEval_factrecall_en_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt="Please answer the following questions based on the given article.\n\nArticle: {context}\n\nPlease answer the following questions based on the above article.\n\nQuestion: {input}\nAnswer:",
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=16),
)

LVEval_factrecall_en_eval_cfg = dict(
evaluator=dict(type=LVEvalF1Evaluator, language="en"), pred_role="BOT"
)

DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"]


def get_dataset_names(dataset_name, length_levels):
datasets = []
for length in length_levels:
datasets.append(f"{dataset_name}_{length}")
return datasets


LVEval_factrecall_en_datasets = [
dict(
type=LVEvalfactrecallenDataset,
abbr="LVEval_" + name_len,
path="Infinigence/LVEval",
name=name_len,
reader_cfg=LVEval_factrecall_en_reader_cfg,
infer_cfg=LVEval_factrecall_en_infer_cfg,
eval_cfg=LVEval_factrecall_en_eval_cfg,
)
for name_len in get_dataset_names("factrecall_en", DATASET_LENGTH_LEVEL)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mmengine.config import read_base

with read_base():
from .lveval_factrecall_zh_gen_dbee70 import (
LVEval_factrecall_zh_datasets,
) # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import LVEvalF1Evaluator, LVEvalfactrecallzhDataset

LVEval_factrecall_zh_reader_cfg = dict(
input_columns=["context", "input"],
output_column="answers",
train_split="test",
test_split="test",
)

LVEval_factrecall_zh_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt="请基于给定的文章回答下述问题。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:",
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=16),
)

LVEval_factrecall_zh_eval_cfg = dict(
evaluator=dict(type=LVEvalF1Evaluator, language="zh"), pred_role="BOT"
)

DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"]


def get_dataset_names(dataset_name, length_levels):
datasets = []
for length in length_levels:
datasets.append(f"{dataset_name}_{length}")
return datasets


LVEval_factrecall_zh_datasets = [
dict(
type=LVEvalfactrecallzhDataset,
abbr="LVEval_" + name_len,
path="Infinigence/LVEval",
name=name_len,
reader_cfg=LVEval_factrecall_zh_reader_cfg,
infer_cfg=LVEval_factrecall_zh_infer_cfg,
eval_cfg=LVEval_factrecall_zh_eval_cfg,
)
for name_len in get_dataset_names("factrecall_zh", DATASET_LENGTH_LEVEL)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mmengine.config import read_base

with read_base():
from .lveval_hotpotwikiqa_mixup_gen_77ce82 import (
LVEval_hotpotwikiqa_mixup_datasets,
) # noqa: F401, F403
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import (
LVEvalOPTF1Evaluator,
LVEvalhotpotwikiqaDataset,
)

LVEval_hotpotwikiqa_mixup_reader_cfg = dict(
input_columns=["context", "input"],
output_column="answers",
train_split="test",
test_split="test",
)

LVEval_hotpotwikiqa_mixup_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(
role="HUMAN",
prompt="Answer the question based on the given passages. Questions and answers are only relevant to some passages. Only give me the answer and do not output any other explanation and evidence.\n\nArticle: {context}\n\nPlease answer the following question based on the above passages. Questions and answers are only relevant to some passages. Only give me the answer and do not output any other explanation and evidence.\n\nQuestion: {input}\nAnswer:",
),
],
),
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64),
)

LVEval_hotpotwikiqa_mixup_eval_cfg = dict(
evaluator=dict(type=LVEvalOPTF1Evaluator, language="en"), pred_role="BOT"
)

DATASET_LENGTH_LEVEL = ["16k", "32k", "64k", "128k", "256k"]


def get_dataset_names(dataset_name, length_levels):
datasets = []
for length in length_levels:
datasets.append(f"{dataset_name}_{length}")
return datasets


LVEval_hotpotwikiqa_mixup_datasets = [
dict(
type=LVEvalhotpotwikiqaDataset,
abbr="LVEval_" + name_len,
path="Infinigence/LVEval",
name=name_len,
reader_cfg=LVEval_hotpotwikiqa_mixup_reader_cfg,
infer_cfg=LVEval_hotpotwikiqa_mixup_infer_cfg,
eval_cfg=LVEval_hotpotwikiqa_mixup_eval_cfg,
)
for name_len in get_dataset_names(
"hotpotwikiqa_mixup", DATASET_LENGTH_LEVEL
)
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from mmengine.config import read_base

with read_base():
from .lveval_lic_mixup_gen_01eb0c import (
LVEval_lic_mixup_datasets,
) # noqa: F401, F403
Loading
Loading