Merge branch 'master' into device-enhance

Lightning-AI · Jan 13, 2025 · 523c8dd · 523c8dd
2 parents dee68b1 + a944e77
commit 523c8dd
Show file tree

Hide file tree

Showing 295 changed files with 792 additions and 565 deletions.
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
@@ -182,7 +182,7 @@ We welcome any useful contribution! For your convenience here's a recommended wo
 1. Use tags in PR name for the following cases:
 
    - **\[blocked by #<number>\]** if your work is dependent on other PRs.
-   - **\[wip\]** when you start to re-edit your work, mark it so no one will accidentally merge it in meantime.
+   - **[wip]** when you start to re-edit your work, mark it so no one will accidentally merge it in meantime.
 
 ### Question & Answer
 

diff --git a/.github/checkgroup.yml b/.github/checkgroup.yml
@@ -23,26 +23,26 @@ subprojects:
       - "pl-cpu (macOS-14, lightning, 3.10, 2.1)"
       - "pl-cpu (macOS-14, lightning, 3.11, 2.2.2)"
       - "pl-cpu (macOS-14, lightning, 3.11, 2.3)"
-      - "pl-cpu (macOS-14, lightning, 3.12, 2.4.1)"
-      - "pl-cpu (macOS-14, lightning, 3.12, 2.5.1)"
+      - "pl-cpu (macOS-14, lightning, 3.12.7, 2.4.1)"
+      - "pl-cpu (macOS-14, lightning, 3.12.7, 2.5.1)"
       - "pl-cpu (ubuntu-20.04, lightning, 3.9, 2.1, oldest)"
       - "pl-cpu (ubuntu-20.04, lightning, 3.10, 2.1)"
       - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.2.2)"
       - "pl-cpu (ubuntu-20.04, lightning, 3.11, 2.3)"
-      - "pl-cpu (ubuntu-22.04, lightning, 3.12, 2.4.1)"
-      - "pl-cpu (ubuntu-22.04, lightning, 3.12, 2.5.1)"
+      - "pl-cpu (ubuntu-22.04, lightning, 3.12.7, 2.4.1)"
+      - "pl-cpu (ubuntu-22.04, lightning, 3.12.7, 2.5.1)"
       - "pl-cpu (windows-2022, lightning, 3.9, 2.1, oldest)"
       - "pl-cpu (windows-2022, lightning, 3.10, 2.1)"
       - "pl-cpu (windows-2022, lightning, 3.11, 2.2.2)"
       - "pl-cpu (windows-2022, lightning, 3.11, 2.3)"
-      - "pl-cpu (windows-2022, lightning, 3.12, 2.4.1)"
-      - "pl-cpu (windows-2022, lightning, 3.12, 2.5.1)"
+      - "pl-cpu (windows-2022, lightning, 3.12.7, 2.4.1)"
+      - "pl-cpu (windows-2022, lightning, 3.12.7, 2.5.1)"
       - "pl-cpu (macOS-14, pytorch, 3.9, 2.1)"
       - "pl-cpu (ubuntu-20.04, pytorch, 3.9, 2.1)"
       - "pl-cpu (windows-2022, pytorch, 3.9, 2.1)"
-      - "pl-cpu (macOS-14, pytorch, 3.12, 2.5.1)"
-      - "pl-cpu (ubuntu-22.04, pytorch, 3.12, 2.5.1)"
-      - "pl-cpu (windows-2022, pytorch, 3.12, 2.5.1)"
+      - "pl-cpu (macOS-14, pytorch, 3.12.7, 2.5.1)"
+      - "pl-cpu (ubuntu-22.04, pytorch, 3.12.7, 2.5.1)"
+      - "pl-cpu (windows-2022, pytorch, 3.12.7, 2.5.1)"
 
   - id: "pytorch_lightning: Azure GPU"
     paths:
@@ -176,26 +176,26 @@ subprojects:
       - "fabric-cpu (macOS-14, lightning, 3.10, 2.1)"
       - "fabric-cpu (macOS-14, lightning, 3.11, 2.2.2)"
       - "fabric-cpu (macOS-14, lightning, 3.11, 2.3)"
-      - "fabric-cpu (macOS-14, lightning, 3.12, 2.4.1)"
-      - "fabric-cpu (macOS-14, lightning, 3.12, 2.5.1)"
+      - "fabric-cpu (macOS-14, lightning, 3.12.7, 2.4.1)"
+      - "fabric-cpu (macOS-14, lightning, 3.12.7, 2.5.1)"
       - "fabric-cpu (ubuntu-20.04, lightning, 3.9, 2.1, oldest)"
       - "fabric-cpu (ubuntu-20.04, lightning, 3.10, 2.1)"
       - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.2.2)"
       - "fabric-cpu (ubuntu-20.04, lightning, 3.11, 2.3)"
-      - "fabric-cpu (ubuntu-22.04, lightning, 3.12, 2.4.1)"
-      - "fabric-cpu (ubuntu-22.04, lightning, 3.12, 2.5.1)"
+      - "fabric-cpu (ubuntu-22.04, lightning, 3.12.7, 2.4.1)"
+      - "fabric-cpu (ubuntu-22.04, lightning, 3.12.7, 2.5.1)"
       - "fabric-cpu (windows-2022, lightning, 3.9, 2.1, oldest)"
       - "fabric-cpu (windows-2022, lightning, 3.10, 2.1)"
       - "fabric-cpu (windows-2022, lightning, 3.11, 2.2.2)"
       - "fabric-cpu (windows-2022, lightning, 3.11, 2.3)"
-      - "fabric-cpu (windows-2022, lightning, 3.12, 2.4.1)"
-      - "fabric-cpu (windows-2022, lightning, 3.12, 2.5.1)"
+      - "fabric-cpu (windows-2022, lightning, 3.12.7, 2.4.1)"
+      - "fabric-cpu (windows-2022, lightning, 3.12.7, 2.5.1)"
       - "fabric-cpu (macOS-14, fabric, 3.9, 2.1)"
       - "fabric-cpu (ubuntu-20.04, fabric, 3.9, 2.1)"
       - "fabric-cpu (windows-2022, fabric, 3.9, 2.1)"
-      - "fabric-cpu (macOS-14, fabric, 3.12, 2.5.1)"
-      - "fabric-cpu (ubuntu-22.04, fabric, 3.12, 2.5.1)"
-      - "fabric-cpu (windows-2022, fabric, 3.12, 2.5.1)"
+      - "fabric-cpu (macOS-14, fabric, 3.12.7, 2.5.1)"
+      - "fabric-cpu (ubuntu-22.04, fabric, 3.12.7, 2.5.1)"
+      - "fabric-cpu (windows-2022, fabric, 3.12.7, 2.5.1)"
 
   - id: "lightning_fabric: Azure GPU"
     paths:

diff --git a/.github/workflows/README.md b/.github/workflows/README.md
@@ -16,7 +16,7 @@ Brief description of all our automation tools used for boosting development perf
 | .azure-pipelines/gpu-benchmarks.yml    | Run speed/memory benchmarks for parity with vanila PyTorch.                               | GPU         |
 | .github/workflows/ci-flagship-apps.yml | Run end-2-end tests with full applications, including deployment to the production cloud. | CPU         |
 | .github/workflows/ci-tests-pytorch.yml | Run all tests except for accelerator-specific, standalone and slow tests.                 | CPU         |
-| .github/workflows/tpu-tests.yml        | Run only TPU-specific tests. Requires that the PR title contains '\[TPU\]'                | TPU         |
+| .github/workflows/tpu-tests.yml        | Run only TPU-specific tests. Requires that the PR title contains '[TPU]'                  | TPU         |
 
 \* Each standalone test needs to be run in separate processes to avoid unwanted interactions between test cases.
 

diff --git a/.github/workflows/docs-build.yml b/.github/workflows/docs-build.yml
@@ -174,6 +174,21 @@ jobs:
         with:
           project_id: ${{ secrets.GCS_PROJECT }}
 
+      # Uploading docs as archive to GCS, so they can be as backup
+      - name: Upload docs as archive to GCS 🪣
+        if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
+        working-directory: docs/build
+        run: |
+          zip ${{ env.VERSION }}.zip -r html/
+          gsutil cp ${{ env.VERSION }}.zip ${GCP_TARGET}
+
+      - name: Inject version selector
+        working-directory: docs/build
+        run: |
+          pip install -q wget
+          python -m wget https://raw.githubusercontent.com/Lightning-AI/utilities/main/scripts/inject-selector-script.py
+          python inject-selector-script.py html ${{ matrix.pkg-name }}
+
       # Uploading docs to GCS, so they can be served on lightning.ai
       - name: Upload docs/${{ matrix.pkg-name }}/stable to GCS 🪣
         if: startsWith(github.ref, 'refs/heads/release/') && github.event_name == 'push'
@@ -188,11 +203,3 @@ jobs:
       - name: Upload docs/${{ matrix.pkg-name }}/release to GCS 🪣
         if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
         run: gsutil -m rsync -d -R docs/build/html/ ${GCP_TARGET}/${{ env.VERSION }}
-
-      # Uploading docs as archive to GCS, so they can be as backup
-      - name: Upload docs as archive to GCS 🪣
-        if: startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch'
-        working-directory: docs/build
-        run: |
-          zip ${{ env.VERSION }}.zip -r html/
-          gsutil cp ${{ env.VERSION }}.zip ${GCP_TARGET}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -23,7 +23,7 @@ ci:
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: end-of-file-fixer
       - id: trailing-whitespace
@@ -65,12 +65,12 @@ repos:
         args: ["--in-place"]
 
   - repo: https://github.com/sphinx-contrib/sphinx-lint
-    rev: v0.9.1
+    rev: v1.0.0
     hooks:
       - id: sphinx-lint
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.5.0
+    rev: v0.8.6
     hooks:
       # try to fix what is possible
       - id: ruff
@@ -81,7 +81,7 @@ repos:
       - id: ruff
 
   - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.17
+    rev: 0.7.21
     hooks:
       - id: mdformat
         additional_dependencies:

diff --git a/docs/source-pytorch/common/tbptt.rst b/docs/source-pytorch/common/tbptt.rst
@@ -12,48 +12,91 @@ hidden states should be kept in-between each time-dimension split.
 .. code-block:: python
 
     import torch
+    import torch.nn as nn
+    import torch.nn.functional as F
     import torch.optim as optim
-    import pytorch_lightning as pl
-    from pytorch_lightning import LightningModule
+    from torch.utils.data import Dataset, DataLoader
 
-    class LitModel(LightningModule):
+    import lightning as L
+
+
+    class AverageDataset(Dataset):
+        def __init__(self, dataset_len=300, sequence_len=100):
+            self.dataset_len = dataset_len
+            self.sequence_len = sequence_len
+            self.input_seq = torch.randn(dataset_len, sequence_len, 10)
+            top, bottom = self.input_seq.chunk(2, -1)
+            self.output_seq = top + bottom.roll(shifts=1, dims=-1)
+
+        def __len__(self):
+            return self.dataset_len
+
+        def __getitem__(self, item):
+            return self.input_seq[item], self.output_seq[item]
+
+
+    class LitModel(L.LightningModule):
 
         def __init__(self):
             super().__init__()
 
+            self.batch_size = 10
+            self.in_features = 10
+            self.out_features = 5
+            self.hidden_dim = 20
+
             # 1. Switch to manual optimization
             self.automatic_optimization = False
-
             self.truncated_bptt_steps = 10
-            self.my_rnn = ParityModuleRNN() # Define RNN model using ParityModuleRNN
+
+            self.rnn = nn.LSTM(self.in_features, self.hidden_dim, batch_first=True)
+            self.linear_out = nn.Linear(in_features=self.hidden_dim, out_features=self.out_features)
+
+        def forward(self, x, hs):
+            seq, hs = self.rnn(x, hs)
+            return self.linear_out(seq), hs
 
         # 2. Remove the `hiddens` argument
         def training_step(self, batch, batch_idx):
-
             # 3. Split the batch in chunks along the time dimension
-            split_batches = split_batch(batch, self.truncated_bptt_steps)
-
-            batch_size = 10
-            hidden_dim = 20
-            hiddens = torch.zeros(1, batch_size, hidden_dim, device=self.device)
-            for split_batch in range(split_batches):
-                # 4. Perform the optimization in a loop
-                loss, hiddens = self.my_rnn(split_batch, hiddens)
-                self.backward(loss)
-                self.optimizer.step()
-                self.optimizer.zero_grad()
+            x, y = batch
+            split_x, split_y = [
+                x.tensor_split(self.truncated_bptt_steps, dim=1),
+                y.tensor_split(self.truncated_bptt_steps, dim=1)
+            ]
+
+            hiddens = None
+            optimizer = self.optimizers()
+            losses = []
+
+            # 4. Perform the optimization in a loop
+            for x, y in zip(split_x, split_y):
+                y_pred, hiddens = self(x, hiddens)
+                loss = F.mse_loss(y_pred, y)
+
+                optimizer.zero_grad()
+                self.manual_backward(loss)
+                optimizer.step()
 
                 # 5. "Truncate"
-                hiddens = hiddens.detach()
+                hiddens = [h.detach() for h in hiddens]
+                losses.append(loss.detach())
+
+            avg_loss = sum(losses) / len(losses)
+            self.log("train_loss", avg_loss, prog_bar=True)
 
             # 6. Remove the return of `hiddens`
             # Returning loss in manual optimization is not needed
             return None
 
         def configure_optimizers(self):
-            return optim.Adam(self.my_rnn.parameters(), lr=0.001)
+            return optim.Adam(self.parameters(), lr=0.001)
+
+        def train_dataloader(self):
+            return DataLoader(AverageDataset(), batch_size=self.batch_size)
+
 
     if __name__ == "__main__":
         model = LitModel()
-        trainer = pl.Trainer(max_epochs=5)
-        trainer.fit(model, train_dataloader) # Define your own dataloader
+        trainer = L.Trainer(max_epochs=5)
+        trainer.fit(model)
diff --git a/docs/source-pytorch/tuning/profiler_intermediate.rst b/docs/source-pytorch/tuning/profiler_intermediate.rst
@@ -55,7 +55,7 @@ The profiler will generate an output like this:
     Self CPU time total: 1.681ms
 
 .. note::
-    When using the PyTorch Profiler, wall clock time will not not be representative of the true wall clock time.
+    When using the PyTorch Profiler, wall clock time will not be representative of the true wall clock time.
     This is due to forcing profiled operations to be measured synchronously, when many CUDA ops happen asynchronously.
     It is recommended to use this Profiler to find bottlenecks/breakdowns, however for end to end wall clock time use
     the ``SimpleProfiler``.
@@ -142,7 +142,7 @@ This profiler will record ``training_step``, ``validation_step``, ``test_step``,
 The output above shows the profiling for the action ``training_step``.
 
 .. note::
-    When using the PyTorch Profiler, wall clock time will not not be representative of the true wall clock time.
+    When using the PyTorch Profiler, wall clock time will not be representative of the true wall clock time.
     This is due to forcing profiled operations to be measured synchronously, when many CUDA ops happen asynchronously.
     It is recommended to use this Profiler to find bottlenecks/breakdowns, however for end to end wall clock time use
     the ``SimpleProfiler``.

diff --git a/examples/fabric/build_your_own_trainer/run.py b/examples/fabric/build_your_own_trainer/run.py
@@ -1,8 +1,9 @@
-import lightning as L
 import torch
 from torchmetrics.functional.classification.accuracy import accuracy
 from trainer import MyCustomTrainer
 
+import lightning as L
+
 
 class MNISTModule(L.LightningModule):
     def __init__(self) -> None:

diff --git a/examples/fabric/build_your_own_trainer/trainer.py b/examples/fabric/build_your_own_trainer/trainer.py
@@ -3,15 +3,16 @@
 from functools import partial
 from typing import Any, Literal, Optional, Union, cast
 
-import lightning as L
 import torch
+from lightning_utilities import apply_to_collection
+from tqdm import tqdm
+
+import lightning as L
 from lightning.fabric.accelerators import Accelerator
 from lightning.fabric.loggers import Logger
 from lightning.fabric.strategies import Strategy
 from lightning.fabric.wrappers import _unwrap_objects
 from lightning.pytorch.utilities.model_helpers import is_overridden
-from lightning_utilities import apply_to_collection
-from tqdm import tqdm
 
 
 class MyCustomTrainer:

diff --git a/examples/fabric/dcgan/train_fabric.py b/examples/fabric/dcgan/train_fabric.py
@@ -16,9 +16,10 @@
 import torch.utils.data
 import torchvision.transforms as transforms
 import torchvision.utils
-from lightning.fabric import Fabric, seed_everything
 from torchvision.datasets import CelebA
 
+from lightning.fabric import Fabric, seed_everything
+
 # Root directory for dataset
 dataroot = "data/"
 # Number of workers for dataloader

diff --git a/examples/fabric/fp8_distributed_transformer/train.py b/examples/fabric/fp8_distributed_transformer/train.py
@@ -1,15 +1,16 @@
-import lightning as L
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from lightning.fabric.strategies import ModelParallelStrategy
-from lightning.pytorch.demos import Transformer, WikiText2
 from torch.distributed._composable.fsdp.fully_shard import fully_shard
 from torch.distributed.device_mesh import DeviceMesh
 from torch.utils.data import DataLoader
 from torchao.float8 import Float8LinearConfig, convert_to_float8_training
 from tqdm import tqdm
 
+import lightning as L
+from lightning.fabric.strategies import ModelParallelStrategy
+from lightning.pytorch.demos import Transformer, WikiText2
+
 
 def configure_model(model: nn.Module, device_mesh: DeviceMesh) -> nn.Module:
     float8_config = Float8LinearConfig(

diff --git a/examples/fabric/image_classifier/train_fabric.py b/examples/fabric/image_classifier/train_fabric.py
@@ -36,11 +36,12 @@
 import torch.nn.functional as F
 import torch.optim as optim
 import torchvision.transforms as T
-from lightning.fabric import Fabric, seed_everything
 from torch.optim.lr_scheduler import StepLR
 from torchmetrics.classification import Accuracy
 from torchvision.datasets import MNIST
 
+from lightning.fabric import Fabric, seed_everything
+
 DATASETS_PATH = path.join(path.dirname(__file__), "..", "..", "..", "Datasets")
 
 

diff --git a/examples/fabric/kfold_cv/train_fabric.py b/examples/fabric/kfold_cv/train_fabric.py
@@ -20,12 +20,13 @@
 import torch.nn.functional as F
 import torch.optim as optim
 import torchvision.transforms as T
-from lightning.fabric import Fabric, seed_everything
 from sklearn import model_selection
 from torch.utils.data import DataLoader, SubsetRandomSampler
 from torchmetrics.classification import Accuracy
 from torchvision.datasets import MNIST
 
+from lightning.fabric import Fabric, seed_everything
+
 DATASETS_PATH = path.join(path.dirname(__file__), "..", "..", "..", "Datasets")
 
 

diff --git a/examples/fabric/language_model/train.py b/examples/fabric/language_model/train.py
@@ -1,9 +1,10 @@
-import lightning as L
 import torch
 import torch.nn.functional as F
-from lightning.pytorch.demos import Transformer, WikiText2
 from torch.utils.data import DataLoader, random_split
 
+import lightning as L
+from lightning.pytorch.demos import Transformer, WikiText2
+
 
 def main():
     L.seed_everything(42)