Skip to content

Commit

Permalink
[Feature] Add daily test case (open-compass#864)
Browse files Browse the repository at this point in the history
* add daily test case

* Update pr-run-test.yml

* Update daily-run-test.yml

* Update daily-run-test.yml

* Update pr-run-test.yml

---------

Co-authored-by: zhulin1 <[email protected]>
  • Loading branch information
zhulinJulia24 and zhulin1 authored Feb 2, 2024
1 parent d0ac8d0 commit 166265c
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 13 deletions.
93 changes: 93 additions & 0 deletions .github/scripts/oc_score_assert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import csv
import os

import pytest
import yaml

output_path = 'regression_result_daily'

model_list = ['internlm-7b-hf', 'internlm-chat-7b-hf']
dataset_list = ['ARC-c', 'chid-dev', 'chid-test', 'openai_humaneval']


@pytest.fixture()
def baseline_scores(request):
config_path = os.path.join(request.config.rootdir,
'.github/scripts/oc_score_baseline.yaml')
with open(config_path) as f:
config = yaml.load(f.read(), Loader=yaml.SafeLoader)
return config


@pytest.fixture()
def result_scores():
file = find_csv_files(output_path)
if file is None:
return None
return read_csv_file(file)


@pytest.mark.usefixtures('result_scores')
@pytest.mark.usefixtures('baseline_scores')
class TestChat:
"""Test cases for chat model."""

@pytest.mark.parametrize('model, dataset', [(p1, p2) for p1 in model_list
for p2 in dataset_list])
def test_demo_default(self, baseline_scores, result_scores, model,
dataset):
base_score = baseline_scores.get(model).get(dataset)
result_score = result_scores.get(model).get(dataset)
assert_score(result_score, base_score)


def assert_score(score, baseline):
if score is None or score == '-':
assert False, 'value is none'
if float(score) < (baseline * 1.03) and float(score) > (baseline * 0.97):
print(score + ' between ' + str(baseline * 0.97) + ' and ' +
str(baseline * 1.03))
assert True
else:
assert False, score + ' not between ' + str(
baseline * 0.97) + ' and ' + str(baseline * 1.03)


def find_csv_files(directory):
csv_files = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.csv'):
csv_files.append(os.path.join(root, file))
if len(csv_files) > 1:
raise 'have more than 1 result file, please check the result manually'
if len(csv_files) == 0:
return None
return csv_files[0]


def read_csv_file(file_path):
with open(file_path, 'r') as csvfile:
reader = csv.DictReader(csvfile)
filtered_data = []

for row in reader:
filtered_row = {
k: v
for k, v in row.items()
if k not in ['version', 'metric', 'mode']
}
filtered_data.append(filtered_row)

result = {}
for data in filtered_data:
dataset = data.get('dataset')
for key in data.keys():
if key == 'dataset':
continue
else:
if key in result.keys():
result.get(key)[dataset] = data.get(key)
else:
result[key] = {dataset: data.get(key)}
return result
11 changes: 11 additions & 0 deletions .github/scripts/oc_score_baseline.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
internlm-7b-hf:
ARC-c: 36.27
chid-dev: 81.68
chid-test: 83.67
openai_humaneval: 10.37

internlm-chat-7b-hf:
ARC-c: 36.95
chid-dev: 71.78
chid-test: 76.87
openai_humaneval: 21.34
74 changes: 74 additions & 0 deletions .github/workflows/daily-run-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
name: daily_run_test

on:
workflow_dispatch:
schedule:
- cron: '56 16 * * *'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

env:
CONDA_ENV: opencompass_regression
PIP_CACHE_PATH: /cpfs01/user/qa-llm-cicd/.cache/pip
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub

jobs:
daily_run_test:
runs-on: self-hosted
environment: 'prod'
timeout-minutes: 240 #4hours
steps:
- name: Clone repository
uses: actions/checkout@v2
- name: Prepare - create conda env and install torch
run: |
eval "$(conda shell.bash hook)"
conda create -y --name ${{env.CONDA_ENV}} python=3.10
conda activate ${{env.CONDA_ENV}}
pip install torch torchvision torchaudio --cache-dir ${{env.PIP_CACHE_PATH}} --index-url https://download.pytorch.org/whl/cu118
conda info --envs
- name: Prepare - Pip install code
run: |
eval "$(conda shell.bash hook)"
conda activate ${{env.CONDA_ENV}}
pip install -e . --cache-dir ${{env.PIP_CACHE_PATH}}
pip install human_eval transformers==4.33.0 --cache-dir ${{env.PIP_CACHE_PATH}}
conda info --envs
- name: Prepare - prepare data and hf model
run: |
cp -r ${{env.USERSPACE_PREFIX}}/data .
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1;
- name: Run test
run: |
eval "$(conda shell.bash hook)"
conda activate ${{env.CONDA_ENV}}
conda info --envs
rm -rf regression_result_daily
export from_tf=TRUE
python3 run.py --models hf_internlm_chat_7b hf_internlm_7b --datasets FewCLUE_chid_ppl humaneval_gen ARC_c_ppl --work-dir regression_result_daily --debug
- name: Get result
run: |
pip install pytest --cache-dir ${{env.PIP_CACHE_PATH}}
pytest -s -v --color=yes .github/scripts/oc_score_assert.py
- name: Remove Conda Env
if: always()
run: |
eval "$(conda shell.bash hook)"
conda env remove --name ${{env.CONDA_ENV}}
conda info --envs
notify_to_feishu:
if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'develop' || github.ref_name == 'main') }}
needs: [daily_run_test]
environment: 'prod'
timeout-minutes: 5
runs-on: self-hosted
steps:
- name: notify
run: |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Opencompass- pr test failed","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}' ${{ secrets.WEBHOOK_URL }}
28 changes: 15 additions & 13 deletions .github/workflows/pr-run-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,30 @@ concurrency:
cancel-in-progress: true

env:
CONDA_ENV: opencompass_regression_daily
CONDA_ENV: opencompass_base
USERSPACE_PREFIX: /cpfs01/user/qa-llm-cicd
HF_CACHE_PATH: /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub

jobs:
pr_run_test:
runs-on: self-hosted
environment: 'prod'
timeout-minutes: 20
timeout-minutes: 30
steps:
- name: Clone repository
uses: actions/checkout@v2
- name: Prepare - create conda env and install code
- name: Prepare - Install opencompass
run: |
eval "$(conda shell.bash hook)"
conda create --name ${{env.CONDA_ENV}} --clone opencompass_base --offline
conda activate ${{env.CONDA_ENV}}
python3 -m pip install -e .
python3 -m pip uninstall opencompass -y
python3 -m pip install -e . --cache-dir ${{env.USERSPACE_PREFIX}}/.cache/pip
conda info --envs
- name: Prepare - prepare data and hf model
run: |
cp -r /cpfs01/user/qa-llm-cicd/data .
cp -r ${{env.USERSPACE_PREFIX}}/data .
rm -rf ~/.cache/huggingface/hub -f && mkdir ~/.cache -p && mkdir ~/.cache/huggingface -p
ln -s /cpfs01/shared/public/public_hdd/llmeval/model_weights/hf_hub ~/.cache/huggingface/hub
ln -s ${{env.HF_CACHE_PATH}} ~/.cache/huggingface/hub
export HF_DATASETS_OFFLINE=1; export TRANSFORMERS_OFFLINE=1;
- name: Run test
run: |
Expand All @@ -49,21 +51,21 @@ jobs:
rm -rf regression_result
python3 run.py --models hf_internlm_chat_7b --datasets siqa_gen --work-dir regression_result --debug
- name: Get result
if: always()
run: |
score=$(sed -n '$p' regression_result/*/summary/*.csv | awk -F ',' '{print $NF}')
if (( ${score%.*} >= 70 && ${score%.*} <= 80 )); then
echo "score is $score between 70 and 80"
if (( ${score%.*} >= 70 && ${score%.*} <= 75 )); then
echo "score is $score between 70 and 75"
else
echo "score is $score not between 70 and 80"
echo "score is $score not between 70 and 75"
exit 1
fi
rm -rf regression_result
- name: Remove Conda Env
- name: Uninstall opencompass
if: always()
run: |
eval "$(conda shell.bash hook)"
conda env remove --name ${{env.CONDA_ENV}}
conda activate ${{env.CONDA_ENV}}
python3 -m pip uninstall opencompass -y
conda info --envs
notify_to_feishu:
Expand Down

0 comments on commit 166265c

Please sign in to comment.