You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
reacted with thumbs up emoji reacted with thumbs down emoji reacted with laugh emoji reacted with hooray emoji reacted with confused emoji reacted with heart emoji reacted with rocket emoji reacted with eyes emoji
-
(finetune) zhengqing@dell-Precision-7920-Tower:~/DS+/use_LLM_test_xnli/ptuning$ python finetune.py data/ /home/zhengqing/DS+/LLM/glm-4-9b ptuning_v2.yaml
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:01<00:00, 7.33it/s]
trainable params: 10,485,760 || all params: 9,410,437,120 || trainable%: 0.1114
Map: 0%| | 0/392702 [00:00<?, ? examples/s]
╭────────────────────────────────────────────────────────────── Traceback (most recent call last) ──────────────────────────────────────────────────────────────╮
│ /home/zhengqing/DS+/use_LLM_test_xnli/ptuning/finetune.py:425 in main │
│ │
│ 422 │ ) │
│ 423 │ data_manager = DataManager(data_dir, ft_config.data_config) │
│ 424 │ │
│ ❱ 425 │ train_dataset = data_manager.get_dataset( │
│ 426 │ │ Split.TRAIN, │
│ 427 │ │ functools.partial( │
│ 428 │ │ │ process_batch, │
│ │
│ /home/zhengqing/DS+/use_LLM_test_xnli/ptuning/finetune.py:218 in get_dataset │
│ │
│ 215 │ │ │ remove_columns = orig_dataset.column_names │
│ 216 │ │ else: │
│ 217 │ │ │ remove_columns = None │
│ ❱ 218 │ │ return orig_dataset.map( │
│ 219 │ │ │ process_fn, │
│ 220 │ │ │ batched=batched, │
│ 221 │ │ │ remove_columns=remove_columns, │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/datasets/arrow_dataset.py:602 in wrapper │
│ │
│ 599 │ │ else: │
│ 600 │ │ │ self: "Dataset" = kwargs.pop("self") │
│ 601 │ │ # apply actual function │
│ ❱ 602 │ │ out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs) │
│ 603 │ │ datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [ou │
│ 604 │ │ for dataset in datasets: │
│ 605 │ │ │ # Remove task templates if a column mapping of the template is no longer val │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/datasets/arrow_dataset.py:567 in wrapper │
│ │
│ 564 │ │ │ "output_all_columns": self._output_all_columns, │
│ 565 │ │ } │
│ 566 │ │ # apply actual function │
│ ❱ 567 │ │ out: Union["Dataset", "DatasetDict"] = func(self, args, **kwargs) │
│ 568 │ │ datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [ou │
│ 569 │ │ # re-apply format to the output │
│ 570 │ │ for dataset in datasets: │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/datasets/arrow_dataset.py:3161 in map │
│ │
│ 3158 │ │ │ │ │ total=pbar_total, │
│ 3159 │ │ │ │ │ desc=desc or "Map", │
│ 3160 │ │ │ │ ) as pbar: │
│ ❱ 3161 │ │ │ │ │ for rank, done, content in Dataset._map_single(**dataset_kwargs): │
│ 3162 │ │ │ │ │ │ if done: │
│ 3163 │ │ │ │ │ │ │ shards_done += 1 │
│ 3164 │ │ │ │ │ │ │ logger.debug(f"Finished processing shard number {rank} of {n │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/datasets/arrow_dataset.py:3552 in _map_single │
│ │
│ 3549 │ │ │ │ │ │ │ range((slice(i, i + batch_size).indices(shard.num_rows))) │
│ 3550 │ │ │ │ │ │ ) # Something simpler? │
│ 3551 │ │ │ │ │ │ try: │
│ ❱ 3552 │ │ │ │ │ │ │ batch = apply_function_on_filtered_inputs( │
│ 3553 │ │ │ │ │ │ │ │ batch, │
│ 3554 │ │ │ │ │ │ │ │ indices, │
│ 3555 │ │ │ │ │ │ │ │ check_same_num_examples=len(shard.list_indexes()) > 0, │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/datasets/arrow_dataset.py:3421 in apply_function_on_filtered_inputs │
│ │
│ 3418 │ │ │ │ additional_args += (effective_indices,) │
│ 3419 │ │ │ if with_rank: │
│ 3420 │ │ │ │ additional_args += (rank,) │
│ ❱ 3421 │ │ │ processed_inputs = function(*fn_args, *additional_args, **fn_kwargs) │
│ 3422 │ │ │ if isinstance(processed_inputs, LazyDict): │
│ 3423 │ │ │ │ processed_inputs = { │
│ 3424 │ │ │ │ │ k: v for k, v in processed_inputs.data.items() if k not in processed │
│ │
│ /home/zhengqing/DS+/use_LLM_test_xnli/ptuning/finetune.py:252 in process_batch │
│ │
│ 249 │ │ input_ids = [151331, 151333] │
│ 250 │ │ loss_masks = [False, False] │
│ 251 │ │ if combine: │
│ ❱ 252 │ │ │ new_input_ids = tokenizer.apply_chat_template( │
│ 253 │ │ │ │ conv, tokenize=True, return_dict=False │
│ 254 │ │ │ ) │
│ 255 │ │ │ input_ids = new_input_ids │
│ │
│ /home/zhengqing/.cache/huggingface/modules/transformers_modules/glm-4-9b/tokenization_chatglm.py:220 in apply_chat_template │
│ │
│ 217 │ │ │ raise ValueError("Invalid conversation format") │
│ 218 │ │ │
│ 219 │ │ if tokenize: │
│ ❱ 220 │ │ │ output = self.batch_encode_plus( │
│ 221 │ │ │ │ [result] if isinstance(result[0], int) else result, │
│ 222 │ │ │ │ padding=padding, │
│ 223 │ │ │ │ truncation=truncation, │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:3150 in batch_encode_plus │
│ │
│ 3147 │ │ │ **kwargs, │
│ 3148 │ │ ) │
│ 3149 │ │ │
│ ❱ 3150 │ │ return self._batch_encode_plus( │
│ 3151 │ │ │ batch_text_or_text_pairs=batch_text_or_text_pairs, │
│ 3152 │ │ │ add_special_tokens=add_special_tokens, │
│ 3153 │ │ │ padding_strategy=padding_strategy, │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/transformers/tokenization_utils.py:892 in _batch_encode_plus │
│ │
│ 889 │ │ │ second_ids = get_input_ids(pair_ids) if pair_ids is not None else None │
│ 890 │ │ │ input_ids.append((first_ids, second_ids)) │
│ 891 │ │ │
│ ❱ 892 │ │ batch_outputs = self._batch_prepare_for_model( │
│ 893 │ │ │ input_ids, │
│ 894 │ │ │ add_special_tokens=add_special_tokens, │
│ 895 │ │ │ padding_strategy=padding_strategy, │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/transformers/tokenization_utils.py:970 in _batch_prepare_for_model │
│ │
│ 967 │ │ │ │ │ batch_outputs[key] = [] │
│ 968 │ │ │ │ batch_outputs[key].append(value) │
│ 969 │ │ │
│ ❱ 970 │ │ batch_outputs = self.pad( │
│ 971 │ │ │ batch_outputs, │
│ 972 │ │ │ padding=padding_strategy.value, │
│ 973 │ │ │ max_length=max_length, │
│ │
│ /home/zhengqing/DS+/use_LLM_test_xnli/ptuning/finetune.py:357 in patched_pad │
│ │
│ 354 │ original_pad = tokenizer.pad │
│ 355 │ def patched_pad(*args, **kwargs): │
│ 356 │ │ kwargs.pop('padding_side', None) # 移除 'padding_side' 参数 │
│ ❱ 357 │ │ return original_pad(*args, **kwargs) │
│ 358 │ tokenizer.pad = patched_pad │
│ 359 │ │
│ 360 │ if peft_config is not None: │
│ │
│ /home/zhengqing/anaconda3/envs/finetune/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:3366 in pad │
│ │
│ 3363 │ │ batch_outputs = {} │
│ 3364 │ │ for i in range(batch_size): │
│ 3365 │ │ │ inputs = {k: v[i] for k, v in encoded_inputs.items()} │
│ ❱ 3366 │ │ │ outputs = self._pad( │
│ 3367 │ │ │ │ inputs, │
│ 3368 │ │ │ │ max_length=max_length, │
│ 3369 │ │ │ │ padding_strategy=padding_strategy, │
╰───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
TypeError: ChatGLM4Tokenizer._pad() got an unexpected keyword argument 'padding_side'
用的demo的微调代码,为什么会报这样一个错误呢,求解答:TypeError: ChatGLM4Tokenizer._pad() got an unexpected keyword argument 'padding_side'
Beta Was this translation helpful? Give feedback.
All reactions