Skip to content
This repository has been archived by the owner on May 29, 2023. It is now read-only.

Commit

Permalink
Merge pull request #130 from iKernels/feature-#129
Browse files Browse the repository at this point in the history
Feature #129
  • Loading branch information
lucadiliello authored Aug 27, 2022
2 parents 2ce2fff + 7f7c418 commit b3f185c
Show file tree
Hide file tree
Showing 27 changed files with 77 additions and 93 deletions.
13 changes: 12 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,17 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).


## 0.7.12

- Fixed `num_training_steps` for lightning 1.7.

- Changed all static methods `add_*_args` to standard form `add_argparse_args`.

- Deprecated strategies based on DataParallel as in `pytorch-lightning` and added MPS accelerator.

- Fixed deprecated classes in lightning 1.7.


## 0.7.10

- Moved `pre_trained_dir` hyperparameter from `Defaults` to `TransformersModelCheckpointCallback`.
Expand Down Expand Up @@ -67,7 +78,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Added `SwappingLanguageModeling` technique and tests.

- Added `add_adapter_specific_args` method to `SuperAdapter` to allow adding parameters to the CLI.
- Added `add_argparse_args` method to `SuperAdapter` to allow adding parameters to the CLI.

- Fixed typo with which `AdapterDataModule` was not receiving `collate_fn` argument.

Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,14 +108,14 @@ if __name__ == '__main__':
parser.add_argument('--name', type=str, required=True, help='Name of the experiment, well be used to correctly retrieve checkpoints and logs')

# I/O folders
DefaultConfig.add_defaults_args(parser)
DefaultConfig.add_argparse_args(parser)

# add model specific cli arguments
TransformerModel.add_model_specific_args(parser)
YourDataModule.add_datamodule_specific_args(parser)
TransformerModel.add_argparse_args(parser)
YourDataModule.add_argparse_args(parser)

# add callback / logger specific cli arguments
callbacks.TransformersModelCheckpointCallback.add_callback_specific_args(parser)
callbacks.TransformersModelCheckpointCallback.add_argparse_args(parser)

# add all the available trainer options to argparse
# ie: now --gpus --num_nodes ... --fast_dev_run all work in the cli
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,15 @@
@pytest.mark.parametrize("batch_size", [1, 3])
@pytest.mark.parametrize("accumulate_grad_batches", [1, 11])
@pytest.mark.parametrize("iterable", [False, True])
@pytest.mark.parametrize("devices", [1, 2, 4, 8])
@pytest.mark.skipif(not torch.cuda.is_available(), reason="Skipping GPU tests because this machine has not GPUs")
def test_datamodule_gpu_dp(num_workers, batch_size, accumulate_grad_batches, devices, iterable):

# cannot do GPU training without enough devices
if torch.cuda.device_count() < devices:
pytest.skip()
@pytest.mark.skipif(not torch.has_mps, reason="Skipping MPS tests because this machine has no MPS device")
def test_datamodule_mps(num_workers, batch_size, accumulate_grad_batches, iterable):

do_test_datamodule(
num_workers,
batch_size,
accumulate_grad_batches,
iterable,
strategy='dp',
accelerator='gpu',
devices=devices,
accelerator='mps',
devices=1,
num_sanity_val_steps=0,
)
1 change: 1 addition & 0 deletions tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
padding='max_length',
max_length=128,
pin_memory=False,
prefetch_factor=2,
)


Expand Down
9 changes: 9 additions & 0 deletions tests/models/test_models_cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,12 @@ def test_fix_max_steps_cpu(max_epochs, accumulate_grad_batches, batch_size):
batch_size,
accelerator="cpu",
)

do_test_fix_max_steps(
max_epochs,
accumulate_grad_batches,
batch_size,
accelerator="cpu",
strategy="ddp",
devices=10,
)
24 changes: 0 additions & 24 deletions tests/models/test_models_dp.py

This file was deleted.

2 changes: 1 addition & 1 deletion tests/test_optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_optimizers(optimizer_class, batch_size):
)

parser = ArgumentParser()
optimizer_class.add_optimizer_specific_args(parser)
optimizer_class.add_argparse_args(parser)
hyperparameters = Namespace(**vars(hyperparameters), **vars(parser.parse_args("")))

hyperparameters.optimizer_class = optimizer_class
Expand Down
5 changes: 3 additions & 2 deletions tests/test_schedulers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import pytest
import pytorch_lightning as pl
import torch
from transformers import AdamW, BertTokenizer
from torch.optim import AdamW
from transformers import BertTokenizer

from tests.helpers import DummyDataModule, DummyTransformerModel, standard_args
from transformers_lightning.schedulers import (
Expand Down Expand Up @@ -87,7 +88,7 @@ def test_schedulers(scheduler_class, parameters, expected_lrs):
**parameters,
)

scheduler_class.add_scheduler_specific_args(ArgumentParser())
scheduler_class.add_argparse_args(ArgumentParser())

class SchedulerModel(DummyTransformerModel):

Expand Down
1 change: 1 addition & 0 deletions transformers_lightning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import transformers_lightning.datamodules # noqa: F401
import transformers_lightning.datasets # noqa: F401
import transformers_lightning.defaults # noqa: F401
import transformers_lightning.info # noqa: F401
import transformers_lightning.language_modeling # noqa: F401
import transformers_lightning.loggers # noqa: F401
import transformers_lightning.models # noqa: F401
Expand Down
2 changes: 1 addition & 1 deletion transformers_lightning/adapters/super_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,5 @@ def preprocess_line(self, line: list) -> list:
return line

@staticmethod
def add_adapter_specific_args(parser: ArgumentParser) -> ArgumentParser:
def add_argparse_args(parser: ArgumentParser) -> ArgumentParser:
r""" Add here arguments that will be available from the command line. """
2 changes: 1 addition & 1 deletion transformers_lightning/callbacks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ This callback can be used to save a checkpoint after every `k` steps, after ever
>>> parser = ArgumentParser()
>>> ...
>>> # add callback / logger specific parameters
>>> callbacks.TransformersModelCheckpointCallback.add_callback_specific_args(parser)
>>> callbacks.TransformersModelCheckpointCallback.add_argparse_args(parser)
>>> ...
>>> hyperparameters = parser.parse_args()
```
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import shutil
from argparse import ArgumentParser

from pytorch_lightning.callbacks.base import Callback
from pytorch_lightning.callbacks.callback import Callback
from pytorch_lightning.utilities.rank_zero import rank_zero_warn

from transformers_lightning.utils import dump_json, is_simple
Expand Down Expand Up @@ -157,7 +157,7 @@ def on_validation_end(self, trainer, pl_module):
self.save_model(pl_module, epoch=trainer.current_epoch, step=trainer.global_step)

@staticmethod
def add_callback_specific_args(parser: ArgumentParser):
def add_argparse_args(parser: ArgumentParser):
r""" Add callback_specific arguments to parser. """
parser.add_argument(
'--checkpoint_interval',
Expand Down
2 changes: 1 addition & 1 deletion transformers_lightning/datamodules/super_datamodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def predict_dataloader(self):
return None

@staticmethod
def add_datamodule_specific_args(parser: ArgumentParser):
def add_argparse_args(parser: ArgumentParser):
parser.add_argument(
'--num_workers',
required=False,
Expand Down
2 changes: 1 addition & 1 deletion transformers_lightning/defaults/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ class DefaultConfig:
"""

@staticmethod
def add_defaults_args(parser: ArgumentParser):
def add_argparse_args(parser: ArgumentParser):
parser.add_argument(
'--output_dir',
type=str,
Expand Down
4 changes: 2 additions & 2 deletions transformers_lightning/info.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
__version__ = '0.7.11'
__version__ = '0.7.12'
__author__ = 'Luca Di Liello and Matteo Gabburo'
__author_email__ = '[email protected]'
__license__ = 'GNU GENERAL PUBLIC LICENSE v2'
__copyright__ = f'Copyright (c) 2020-2021, {__author__}.'
__copyright__ = f'Copyright (c) 2020-2022, {__author__}.'
__homepage__ = 'https://github.com/iKernels/transformers-lightning'
__docs__ = "Utilities to use Transformers with Pytorch-Lightning"
8 changes: 4 additions & 4 deletions transformers_lightning/loggers/jsonboard_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@

import numpy as np
import torch
from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
from pytorch_lightning.utilities import rank_zero_only
from pytorch_lightning.loggers.logger import Logger, rank_zero_experiment
from pytorch_lightning.utilities.cloud_io import get_filesystem
from pytorch_lightning.utilities.logger import _convert_params, _flatten_dict
from pytorch_lightning.utilities.logger import _sanitize_params as _utils_sanitize_params
from pytorch_lightning.utilities.rank_zero import rank_zero_only

logger = logging.getLogger(__name__)


class JsonBoardLogger(LightningLoggerBase):
class JsonBoardLogger(Logger):
r"""
Log to local file system in `JsonBoard <https://github.com/lucadiliello/jsonboard>`_ format.
Expand Down Expand Up @@ -247,7 +247,7 @@ def __setstate__(self, state: Dict[Any, Any]):
self.__dict__.update(state)

@staticmethod
def add_logger_specific_args(parser: ArgumentParser):
def add_argparse_args(parser: ArgumentParser):
r""" Add callback_specific arguments to parser. """
parser.add_argument(
'--jsonboard_dir', type=str, required=False, default='jsonboard', help="Where to save logs."
Expand Down
4 changes: 2 additions & 2 deletions transformers_lightning/models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ This package containts two high-level models that can be used to inherit some us

## TransfomersModel

`TransformersModel` only overrides `configure_optimizers` by returning a better optimizer and the relative scheduler and finally provides a `add_model_specific_args` to automatically add the parameters of the optimizer to the global parser.
`TransformersModel` only overrides `configure_optimizers` by returning a better optimizer and the relative scheduler and finally provides a `add_argparse_args` to automatically add the parameters of the optimizer to the global parser.

Example:
```python
>>> parser = ArgumentParser()
>>> TransformerModel.add_model_specific_args(parser)
>>> TransformerModel.add_argparse_args(parser)
>>> save_transformers_callback = callbacks.TransformersModelCheckpointCallback(hyperparameters)
>>> hyperparameters = parser.parse_args()
```
31 changes: 11 additions & 20 deletions transformers_lightning/models/transformers_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from argparse import ArgumentParser, Namespace

from pytorch_lightning import LightningModule
from pytorch_lightning.strategies import DataParallelStrategy, DDP2Strategy
from pytorch_lightning.utilities.data import has_len
from pytorch_lightning.utilities.rank_zero import rank_zero_warn
from transformers.configuration_utils import PretrainedConfig
Expand All @@ -19,9 +18,7 @@


class TransformersModel(LightningModule):
r"""
`TransformersModel` add a ready-to-be-used optimizer and scheduler functions.
"""
r""" `TransformersModel` adds ready-to-be-used optimizer and scheduler functions. """

model: PreTrainedModel
tokenizer: PreTrainedTokenizerBase
Expand All @@ -31,12 +28,10 @@ class TransformersModel(LightningModule):
def __init__(self, hyperparameters):
super().__init__()
self.hyperparameters = hyperparameters
self.save_hyperparameters()
self.save_hyperparameters(hyperparameters)

def forward(self, *args, **kwargs):
r"""
Simply call the `model` attribute with the given args and kwargs
"""
r""" Simply call the `model` attribute with the given args and kwargs """
return self.model(*args, **kwargs)

def get_optimizer(self) -> SuperOptimizer:
Expand All @@ -50,7 +45,7 @@ def get_scheduler(self, optimizer) -> SuperScheduler:
return sched_class(self.hyperparameters, optimizer)

def num_training_steps(self) -> int:
r""" Total training steps inferred from datasets length, nodes and devices. """
r""" Total training steps inferred from datasets length, number of nodes and devices. """
if self.trainer.max_steps is not None and self.trainer.max_steps >= 0:
return self.trainer.max_steps

Expand All @@ -62,11 +57,7 @@ def num_training_steps(self) -> int:
train_samples = len(self.trainer.datamodule.train_dataset)

# number of training devices
if isinstance(self.trainer.strategy, (DataParallelStrategy, DDP2Strategy)):
total_devices = self.trainer.num_nodes
else:
total_devices = self.trainer.num_devices * self.trainer.num_nodes

total_devices = self.trainer.num_devices * self.trainer.num_nodes
rank_zero_warn(f"Number of training devices is {total_devices}")

# the number of training samples may be modified in distributed training
Expand Down Expand Up @@ -108,14 +99,14 @@ def configure_optimizers(self):
'optimizer': optimizer,
'lr_scheduler':
{
'scheduler': scheduler, # The LR schduler
'interval': self.hyperparameters.scheduler_interval, # The unit of the scheduler's step size
'frequency': self.hyperparameters.scheduler_frequency, # The frequency of the scheduler
'scheduler': scheduler, # The LR schduler
'interval': self.hyperparameters.scheduler_interval, # The unit of the scheduler's step size
'frequency': self.hyperparameters.scheduler_frequency, # The frequency of the scheduler
}
}

@staticmethod
def add_model_specific_args(parser: ArgumentParser):
def add_argparse_args(parser: ArgumentParser):
parser.add_argument('--optimizer_class', type=str, default='AdamWOptimizer', choices=all_optimizers.keys())
parser.add_argument(
'--scheduler_class', type=str, default='LinearSchedulerWithWarmup', choices=all_schedulers.keys()
Expand All @@ -131,5 +122,5 @@ def add_model_specific_args(parser: ArgumentParser):
sched_class = all_schedulers[tmp_params.scheduler_class]

# add optimizer and scheduler specific args
optim_class.add_optimizer_specific_args(parser)
sched_class.add_scheduler_specific_args(parser)
optim_class.add_argparse_args(parser)
sched_class.add_argparse_args(parser)
4 changes: 2 additions & 2 deletions transformers_lightning/optimizers/adamw.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def __init__(self, hyperparameters: Namespace, named_parameters: Generator):
betas=hyperparameters.adam_betas
)

def add_optimizer_specific_args(parser: ArgumentParser):
super(AdamWOptimizer, AdamWOptimizer).add_optimizer_specific_args(parser)
def add_argparse_args(parser: ArgumentParser):
super(AdamWOptimizer, AdamWOptimizer).add_argparse_args(parser)
parser.add_argument('--learning_rate', type=float, default=1e-4)
parser.add_argument('--weight_decay', type=float, default=0.0)
parser.add_argument('--adam_epsilon', type=float, default=1e-8)
Expand Down
4 changes: 2 additions & 2 deletions transformers_lightning/optimizers/adamw_electra.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ def __init__(self, hyperparameters: Namespace, named_parameters: Generator):
amsgrad=hyperparameters.amsgrad
)

def add_optimizer_specific_args(parser: ArgumentParser):
super(ElectraAdamWOptimizer, ElectraAdamWOptimizer).add_optimizer_specific_args(parser)
def add_argparse_args(parser: ArgumentParser):
super(ElectraAdamWOptimizer, ElectraAdamWOptimizer).add_argparse_args(parser)
parser.add_argument('--learning_rate', type=float, default=1e-3)
parser.add_argument('--weight_decay', type=float, default=0.01)
parser.add_argument('--adam_epsilon', type=float, default=1e-6)
Expand Down
2 changes: 1 addition & 1 deletion transformers_lightning/optimizers/super_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ def __init__(self, hyperparameters: Namespace, *args, **kwargs) -> None:
self.hyperparameters = hyperparameters

@staticmethod
def add_optimizer_specific_args(parser: ArgumentParser):
def add_argparse_args(parser: ArgumentParser):
r""" Add here the hyperparameters used by your optimizer. """
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from argparse import ArgumentParser, Namespace

import torch
from pytorch_lightning.utilities.warnings import rank_zero_warn
from pytorch_lightning.utilities.rank_zero import rank_zero_warn

from transformers_lightning.schedulers.super_scheduler import SuperScheduler

Expand Down Expand Up @@ -50,7 +50,7 @@ def get_lr(self):
return [base_lr * self.lr_lambda(self.last_epoch) for base_lr in self.base_lrs]

@staticmethod
def add_scheduler_specific_args(parser: ArgumentParser):
def add_argparse_args(parser: ArgumentParser):
r""" Add here the hyperparameters specific of the scheduler like the number of warmup steps. """
super(ConstantSchedulerWithWarmup, ConstantSchedulerWithWarmup).add_scheduler_specific_args(parser)
super(ConstantSchedulerWithWarmup, ConstantSchedulerWithWarmup).add_argparse_args(parser)
parser.add_argument('--num_warmup_steps', type=int, default=0)
Loading

0 comments on commit b3f185c

Please sign in to comment.