[Fix] Fix Slurm ENV (#1392)

1. Support Slurm Cluster 2. Support automatic data download 3. Update InternLM2.5-1.8B/20B-Chat
open-compass · Aug 5, 2024 · c81329b · c81329b
1 parent c09fc79
commit c81329b
Show file tree

Hide file tree

Showing 23 changed files with 762 additions and 277 deletions.
diff --git a/.pre-commit-config-zh-cn.yaml b/.pre-commit-config-zh-cn.yaml
@@ -14,7 +14,9 @@ exclude: |
       docs/zh_cn/advanced_guides/compassbench_v2_0.md |
       opencompass/configs/datasets/ |
       opencompass/configs/models/|
-      opencompass/configs/summarizers/
+      opencompass/configs/summarizers/|
+      opencompass/utils/datasets.py |
+      opencompass/utils/datasets_info.py
     )
 repos:
   - repo: https://gitee.com/openmmlab/mirrors-flake8

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -16,7 +16,9 @@ exclude: |
       docs/zh_cn/advanced_guides/compassbench_v2_0.md |
       opencompass/configs/datasets/ |
       opencompass/configs/models/|
-      opencompass/configs/summarizers/
+      opencompass/configs/summarizers/ |
+      opencompass/utils/datasets.py |
+      opencompass/utils/datasets_info.py
     )
 repos:
   - repo: https://github.com/PyCQA/flake8

diff --git a/configs/datasets/subjective/compassarena/compassarena_compare.py b/configs/datasets/subjective/compassarena/compassarena_compare.py
@@ -150,5 +150,5 @@
             infer_order='double',
             base_models=gpt4,
             summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add'),
-            given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/alpaca_eval/gpt4-turbo'}]
+            given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
         ))
diff --git a/configs/models/hf_internlm/hf_internlm2_5_1_8b_chat.py b/configs/models/hf_internlm/hf_internlm2_5_1_8b_chat.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='internlm2_5-1_8b-chat-hf',
+        path='internlm/internlm2_5-1_8b-chat',
+        max_out_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/hf_internlm/hf_internlm2_5_20b_chat.py b/configs/models/hf_internlm/hf_internlm2_5_20b_chat.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='internlm2_5-20b-chat-hf',
+        path='internlm/internlm2_5-20b-chat',
+        max_out_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='internlm2_5-1_8b-chat-turbomind',
+        path='internlm/internlm2_5-1_8b-chat',
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=8192,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py b/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='internlm2_5-20b-chat-turbomind',
+        path='internlm/internlm2_5-20b-chat',
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=8192,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/opencompass/configs/datasets/subjective/compassarena/compassarena_compare.py b/opencompass/configs/datasets/subjective/compassarena/compassarena_compare.py
@@ -150,5 +150,5 @@
             infer_order='double',
             base_models=gpt4,
             summarizer = dict(type=CompassArenaSummarizer, summary_type='half_add'),
-            given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/alpaca_eval/gpt4-turbo'}]
+            given_pred = [{'abbr':'gpt4-turbo', 'path':'./data/subjective/compass_arena/gpt4-turbo'}]
         ))
diff --git a/opencompass/configs/models/hf_internlm/hf_internlm2_5_1_8b_chat.py b/opencompass/configs/models/hf_internlm/hf_internlm2_5_1_8b_chat.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='internlm2_5-1_8b-chat-hf',
+        path='internlm/internlm2_5-1_8b-chat',
+        max_out_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/opencompass/configs/models/hf_internlm/hf_internlm2_5_20b_chat.py b/opencompass/configs/models/hf_internlm/hf_internlm2_5_20b_chat.py
@@ -0,0 +1,12 @@
+from opencompass.models import HuggingFacewithChatTemplate
+
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='internlm2_5-20b-chat-hf',
+        path='internlm/internlm2_5-20b-chat',
+        max_out_len=2048,
+        batch_size=8,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_1_8b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='internlm2_5-1_8b-chat-turbomind',
+        path='internlm/internlm2_5-1_8b-chat',
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=8192,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
diff --git a/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py b/opencompass/configs/models/hf_internlm/lmdeploy_internlm2_5_20b_chat.py
@@ -0,0 +1,15 @@
+from opencompass.models import TurboMindModelwithChatTemplate
+
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='internlm2_5-20b-chat-turbomind',
+        path='internlm/internlm2_5-20b-chat',
+        engine_config=dict(session_len=8192, max_batch_size=16, tp=2),
+        gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=2048),
+        max_seq_len=8192,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=2),
+    )
+]
diff --git a/opencompass/datasets/charm.py b/opencompass/datasets/charm.py
@@ -8,7 +8,7 @@
 from opencompass.openicl.icl_evaluator import BaseEvaluator, LMEvaluator
 from opencompass.registry import (ICL_EVALUATORS, LOAD_DATASET,
                                   TEXT_POSTPROCESSORS)
-from opencompass.utils import build_dataset_from_cfg
+from opencompass.utils import build_dataset_from_cfg, get_data_path
 
 from .base import BaseDataset
 
@@ -147,6 +147,7 @@ class CharmDataset(BaseDataset):
 
     @staticmethod
     def load(path: str, name: str):
+        path = get_data_path(path, local_mode=True)
         with open(osp.join(path, f'{name}.json'), 'r', encoding='utf-8') as f:
             data = json.load(f)['examples']
         dataset = Dataset.from_list(data)

diff --git a/opencompass/datasets/subjective/alignbench.py b/opencompass/datasets/subjective/alignbench.py
@@ -7,6 +7,7 @@
 from datasets import Dataset, DatasetDict
 
 from opencompass.registry import LOAD_DATASET
+from opencompass.utils import get_data_path
 
 from .subjective_cmp import SubjectiveCmpDataset
 
@@ -15,6 +16,8 @@ class Config:
 
     def __init__(self, alignment_bench_config_path,
                  alignment_bench_config_name) -> None:
+        alignment_bench_config_path = get_data_path(
+            alignment_bench_config_path, local_mode=True)
         config_file_path = osp.join(alignment_bench_config_path,
                                     alignment_bench_config_name + '.json')
         with open(config_file_path, 'r') as config_file:

diff --git a/opencompass/datasets/subjective/arena_hard.py b/opencompass/datasets/subjective/arena_hard.py
@@ -4,6 +4,7 @@
 from datasets import Dataset, DatasetDict
 
 from opencompass.registry import LOAD_DATASET
+from opencompass.utils import get_data_path
 
 from ..base import BaseDataset
 
@@ -12,6 +13,7 @@
 class ArenaHardDataset(BaseDataset):
 
     def load(self, path: str, name: str, *args, **kwargs):
+        path = get_data_path(path, local_mode=True)
         filename = osp.join(path, f'{name}.jsonl')
         dataset = DatasetDict()
         raw_data = []

diff --git a/opencompass/datasets/subjective/mtbench.py b/opencompass/datasets/subjective/mtbench.py
@@ -6,6 +6,7 @@
 from datasets import Dataset, DatasetDict
 
 from opencompass.registry import LOAD_DATASET
+from opencompass.utils import get_data_path
 
 from ..base import BaseDataset
 
@@ -172,6 +173,7 @@ def load(self,
              multi_turn=True,
              *args,
              **kwargs):
+        path = get_data_path(path, local_mode=True)
         filename = osp.join(path, f'{name}.json')
         dataset = DatasetDict()
         raw_data = []

diff --git a/opencompass/datasets/subjective/subjective_cmp.py b/opencompass/datasets/subjective/subjective_cmp.py
@@ -4,6 +4,7 @@
 from datasets import Dataset, DatasetDict
 
 from opencompass.registry import LOAD_DATASET
+from opencompass.utils import get_data_path
 
 from ..base import BaseDataset
 
@@ -12,6 +13,7 @@
 class SubjectiveCmpDataset(BaseDataset):
 
     def load(self, path: str, name: str, *args, **kwargs):
+        path = get_data_path(path, local_mode=True)
         filename = osp.join(path, f'{name}.json')
         dataset = DatasetDict()
         raw_data = []

diff --git a/opencompass/models/openai_api.py b/opencompass/models/openai_api.py
@@ -311,11 +311,15 @@ def get_token_len(self, prompt: str) -> int:
             try:
                 enc = self.tiktoken.encoding_for_model(self.tokenizer_path)
                 return len(enc.encode(prompt))
-            except Exception:
+            except Exception as e:
+                self.logger.warn(f'{e}, tiktoken encoding cannot load '
+                                 '{self.tokenizer_path}')
                 from transformers import AutoTokenizer
                 if self.hf_tokenizer is None:
                     self.hf_tokenizer = AutoTokenizer.from_pretrained(
-                        self.tokenizer_path)
+                        self.tokenizer_path, trust_remote_code=True)
+                    self.logger.infer(
+                        f'Tokenizer is loaded from {self.tokenizer_path}')
                 return len(self.hf_tokenizer(prompt).input_ids)
         else:
             enc = self.tiktoken.encoding_for_model(self.path)
@@ -424,14 +428,14 @@ def _generate(self, input: PromptList | str, max_out_len: int,
                 messages.append(msg)
 
         # Hold out 100 tokens due to potential errors in tiktoken calculation
-        try:
-            max_out_len = min(
-                max_out_len,
-                context_window - self.get_token_len(str(input)) - 100)
-        except KeyError:
-            max_out_len = max_out_len
-        if max_out_len <= 0:
-            return ''
+        # try:
+        #     max_out_len = min(
+        #         max_out_len,
+        #         context_window - self.get_token_len(str(input)) - 100)
+        # except KeyError:
+        #     max_out_len = max_out_len
+        # if max_out_len <= 0:
+        #     return ''
 
         num_retries = 0
         while num_retries < self.retry:

diff --git a/opencompass/tasks/subjective_eval.py b/opencompass/tasks/subjective_eval.py
@@ -15,7 +15,7 @@
 from opencompass.tasks.base import BaseTask
 from opencompass.tasks.openicl_eval import extract_role_pred
 from opencompass.utils import (build_dataset_from_cfg, dataset_abbr_from_cfg,
-                               deal_with_judge_model_abbr,
+                               deal_with_judge_model_abbr, get_data_path,
                                get_infer_output_path, get_logger,
                                model_abbr_from_cfg, task_abbr_from_cfg)
 
@@ -140,6 +140,7 @@ def _load_model_pred(
         for given_pred in given_preds:
             abbr = given_pred['abbr']
             path = given_pred['path']
+            path = get_data_path(path, local_mode=True)
             if abbr == model_cfg['abbr']:
                 filename = osp.join(path, osp.basename(filename))
         # Get partition name