From 74009cea97e1f35725ab693526f5ad77139b068b Mon Sep 17 00:00:00 2001 From: TorchX CI Runner Date: Mon, 23 Sep 2024 15:57:51 +0000 Subject: [PATCH] [doc_push][main] built from 94ac896 (main). Redirects: main -> 0.8.0dev0. --- .../examples_apps_python.zip | Bin 31138 -> 31138 bytes .../examples_apps_jupyter.zip | Bin 39578 -> 39578 bytes .../examples_pipelines_python.zip | Bin 13790 -> 13790 bytes .../examples_pipelines_jupyter.zip | Bin 21485 -> 21485 bytes 0.8.0dev0/custom_components.html | 297 +++++------ 0.8.0dev0/custom_components.ipynb | 483 +++++++----------- 0.8.0dev0/pipelines/airflow.html | 36 +- 0.8.0dev0/pipelines/airflow.ipynb | 76 +-- 0.8.0dev0/quickstart.html | 222 ++++---- 0.8.0dev0/quickstart.ipynb | 398 +++++++-------- 0.8.0dev0/searchindex.js | 2 +- 11 files changed, 705 insertions(+), 809 deletions(-) diff --git a/0.8.0dev0/_downloads/00d67ca99480eb8a810baf2dcc8feeff/examples_apps_python.zip b/0.8.0dev0/_downloads/00d67ca99480eb8a810baf2dcc8feeff/examples_apps_python.zip index b469b97b31850f830f343d3d9c6d5e37e5af6001..255a0ebb4704e09ea2c931c89f72adc152b04c9d 100644 GIT binary patch delta 133 zcmZ4VnQ_r)M&1B#W)=|!5b&-u-^ja=gBeI~-pC;&$m~;RzWA7=1c=^z(r`69m{D%| zNfOM+3En9JW@u#0;s>e1k=%#?f?cR BE64x< delta 133 zcmZ4VnQ_r)M&1B#W)=|!5O}uBa3k+R4(8{(3^#A&5E2A279W$801KQnT+I$F7##0JfObigbx04>uk Aga7~l delta 134 zcmbQWm1)*iCf)#VW)=|!5O}uBa3gO55A*X~hMNm`0z|=#!-{idz>H+O1a2_nnCBry zFrz!$MiR_8)pA>y8K`^miWwdtfytktwB1YxFu!r8GnhU;(;iH7&T;|M0kgaSdZIta&R*P>CF<{N{V2Ht;nLLLleRP delta 66 zcmaF6obl~)M&1B#W)=|!5O}`Ja3imqATyBO945HK2+R<4`Ns!hOcn}u2GbG2ZeV(0 GunzzMi5RE= diff --git a/0.8.0dev0/custom_components.html b/0.8.0dev0/custom_components.html index 4c33019db..b151bd86e 100644 --- a/0.8.0dev0/custom_components.html +++ b/0.8.0dev0/custom_components.html @@ -538,12 +538,12 @@

Hello World
-torchx 2024-09-17 23:23:18 INFO     Tracker configurations: {}
-torchx 2024-09-17 23:23:18 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
-torchx 2024-09-17 23:23:18 INFO     Log directory is: /tmp/torchx_pqo87ixa
-torchx 2024-09-17 23:23:18 INFO     Waiting for the app to finish...
+torchx 2024-09-23 15:50:26 INFO     Tracker configurations: {}
+torchx 2024-09-23 15:50:26 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
+torchx 2024-09-23 15:50:26 INFO     Log directory is: /tmp/torchx_3pxeqfza
+torchx 2024-09-23 15:50:26 INFO     Waiting for the app to finish...
 greeter/0 Hello, your name!
-torchx 2024-09-17 23:23:19 INFO     Job finished: SUCCEEDED
+torchx 2024-09-23 15:50:27 INFO     Job finished: SUCCEEDED
 

If we want to run in other environments, we can build a Docker container so we can run our component in Docker enabled environments such as Kubernetes or via the local Docker scheduler.

@@ -595,11 +595,11 @@

Hello World
-torchx 2024-09-17 23:25:16 INFO     Tracker configurations: {}
-torchx 2024-09-17 23:25:16 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-09-17 23:25:16 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-09-17 23:25:16 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-09-17 23:25:17 WARNING  failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.45/images/create?tag=latest&fromImage=my_app: Not Found ("pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied")
-torchx 2024-09-17 23:25:17 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-09-17 23:25:17 INFO     Step 1/4 : ARG IMAGE
-torchx 2024-09-17 23:25:17 INFO     Step 2/4 : FROM $IMAGE
-torchx 2024-09-17 23:25:17 INFO      ---> 9d9fa455e078
-torchx 2024-09-17 23:25:17 INFO     Step 3/4 : COPY . .
-torchx 2024-09-17 23:25:21 INFO      ---> 5e2bee314bac
-torchx 2024-09-17 23:25:21 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-09-17 23:25:21 INFO      ---> Running in 171d9d56d726
-torchx 2024-09-17 23:25:25 INFO      ---> Removed intermediate container 171d9d56d726
-torchx 2024-09-17 23:25:25 INFO      ---> a3f23864b182
-torchx 2024-09-17 23:25:25 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
-torchx 2024-09-17 23:25:25 INFO     Successfully built a3f23864b182
-torchx 2024-09-17 23:25:25 INFO     Built new image `sha256:a3f23864b1828796d32cfd7bf208ed0f7fb0d40161c4f9d40353a9b3dc308eaa` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.
-torchx 2024-09-17 23:25:26 INFO     Waiting for the app to finish...
+torchx 2024-09-23 15:52:13 INFO     Tracker configurations: {}
+torchx 2024-09-23 15:52:13 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-09-23 15:52:13 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-09-23 15:52:13 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-09-23 15:52:13 WARNING  failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.45/images/create?tag=latest&fromImage=my_app: Not Found ("pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied")
+torchx 2024-09-23 15:52:13 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-09-23 15:52:13 INFO     Step 1/4 : ARG IMAGE
+torchx 2024-09-23 15:52:13 INFO     Step 2/4 : FROM $IMAGE
+torchx 2024-09-23 15:52:13 INFO      ---> 52f92f1ea889
+torchx 2024-09-23 15:52:13 INFO     Step 3/4 : COPY . .
+torchx 2024-09-23 15:52:17 INFO      ---> e41cf3989ab3
+torchx 2024-09-23 15:52:17 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-09-23 15:52:17 INFO      ---> Running in a02128ae8d20
+torchx 2024-09-23 15:52:21 INFO      ---> Removed intermediate container a02128ae8d20
+torchx 2024-09-23 15:52:21 INFO      ---> df927abef23e
+torchx 2024-09-23 15:52:21 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
+torchx 2024-09-23 15:52:21 INFO     Successfully built df927abef23e
+torchx 2024-09-23 15:52:21 INFO     Built new image `sha256:df927abef23e3c2c2e3fe0f1968dac429f9c27bda68c1c5cc0db55e084070de4` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.
+torchx 2024-09-23 15:52:22 INFO     Waiting for the app to finish...
 greeter/0 Hello, your name!
-torchx 2024-09-17 23:25:27 INFO     Job finished: SUCCEEDED
+torchx 2024-09-23 15:52:23 INFO     Job finished: SUCCEEDED
 

If you have a Kubernetes cluster you can use the Kubernetes scheduler to launch this on the cluster instead.

@@ -846,25 +833,25 @@

Builtins
-torchx 2024-09-17 23:25:30 INFO     Tracker configurations: {}
-torchx 2024-09-17 23:25:30 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-09-17 23:25:30 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-09-17 23:25:30 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-09-17 23:28:29 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-09-17 23:28:29 INFO     Step 1/4 : ARG IMAGE
-torchx 2024-09-17 23:28:29 INFO     Step 2/4 : FROM $IMAGE
-torchx 2024-09-17 23:28:29 INFO      ---> c31c993343e6
-torchx 2024-09-17 23:28:29 INFO     Step 3/4 : COPY . .
-torchx 2024-09-17 23:28:36 INFO      ---> daf1a773479e
-torchx 2024-09-17 23:28:36 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-09-17 23:28:36 INFO      ---> Running in d48689ba95d6
-torchx 2024-09-17 23:28:43 INFO      ---> Removed intermediate container d48689ba95d6
-torchx 2024-09-17 23:28:43 INFO      ---> 8067f3439367
-torchx 2024-09-17 23:28:43 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
-torchx 2024-09-17 23:28:43 INFO     Successfully built 8067f3439367
-torchx 2024-09-17 23:28:43 INFO     Built new image `sha256:8067f34393673970175578c1b5189f3ca20981976885cd29c5c486536d2d26ef` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.
-torchx 2024-09-17 23:28:43 INFO     Waiting for the app to finish...
-torchx 2024-09-17 23:28:43 INFO     Job finished: SUCCEEDED
+torchx 2024-09-23 15:52:25 INFO     Tracker configurations: {}
+torchx 2024-09-23 15:52:25 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-09-23 15:52:25 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-09-23 15:52:25 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-09-23 15:54:46 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-09-23 15:54:46 INFO     Step 1/4 : ARG IMAGE
+torchx 2024-09-23 15:54:46 INFO     Step 2/4 : FROM $IMAGE
+torchx 2024-09-23 15:54:46 INFO      ---> 3ad6a395fc8b
+torchx 2024-09-23 15:54:46 INFO     Step 3/4 : COPY . .
+torchx 2024-09-23 15:54:53 INFO      ---> 3cd7205d8e0d
+torchx 2024-09-23 15:54:53 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-09-23 15:54:53 INFO      ---> Running in e6f5c28b7488
+torchx 2024-09-23 15:55:01 INFO      ---> Removed intermediate container e6f5c28b7488
+torchx 2024-09-23 15:55:01 INFO      ---> b98ce4756599
+torchx 2024-09-23 15:55:01 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
+torchx 2024-09-23 15:55:01 INFO     Successfully built b98ce4756599
+torchx 2024-09-23 15:55:01 INFO     Built new image `sha256:b98ce4756599a7543c6edd010ceea97dad6110805187418250d643814c43de0a` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.
+torchx 2024-09-23 15:55:01 INFO     Waiting for the app to finish...
+torchx 2024-09-23 15:55:01 INFO     Job finished: SUCCEEDED
 echo/0 Hello :)
 
@@ -873,7 +860,7 @@

Builtins
-local_docker://torchx/echo-m2w4xscjv1cqj
+local_docker://torchx/echo-twjnz522z350w
 
diff --git a/0.8.0dev0/custom_components.ipynb b/0.8.0dev0/custom_components.ipynb index cd8a17188..d769df02f 100644 --- a/0.8.0dev0/custom_components.ipynb +++ b/0.8.0dev0/custom_components.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "8208e3b3", + "id": "e9405cb2", "metadata": {}, "source": [ "# Custom Components\n", @@ -27,13 +27,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "e6c0809f", + "id": "f01fcea8", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:23:17.727175Z", - "iopub.status.busy": "2024-09-17T23:23:17.726955Z", - "iopub.status.idle": "2024-09-17T23:23:17.734642Z", - "shell.execute_reply": "2024-09-17T23:23:17.734085Z" + "iopub.execute_input": "2024-09-23T15:50:26.470800Z", + "iopub.status.busy": "2024-09-23T15:50:26.470601Z", + "iopub.status.idle": "2024-09-23T15:50:26.478089Z", + "shell.execute_reply": "2024-09-23T15:50:26.477448Z" } }, "outputs": [ @@ -71,7 +71,7 @@ }, { "cell_type": "markdown", - "id": "5b0445ba", + "id": "7b2e1074", "metadata": {}, "source": [ "Now that we have an app we can write the component file for it. This\n", @@ -84,13 +84,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "aa94a805", + "id": "1fc9303f", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:23:17.736728Z", - "iopub.status.busy": "2024-09-17T23:23:17.736350Z", - "iopub.status.idle": "2024-09-17T23:23:17.739973Z", - "shell.execute_reply": "2024-09-17T23:23:17.739463Z" + "iopub.execute_input": "2024-09-23T15:50:26.480292Z", + "iopub.status.busy": "2024-09-23T15:50:26.479731Z", + "iopub.status.idle": "2024-09-23T15:50:26.483520Z", + "shell.execute_reply": "2024-09-23T15:50:26.482995Z" } }, "outputs": [ @@ -126,7 +126,7 @@ }, { "cell_type": "markdown", - "id": "eb99b079", + "id": "908a3ddb", "metadata": {}, "source": [ "We can execute our component via `torchx run`. The\n", @@ -136,13 +136,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "9ae740b2", + "id": "5db3ed12", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:23:17.742086Z", - "iopub.status.busy": "2024-09-17T23:23:17.741628Z", - "iopub.status.idle": "2024-09-17T23:23:19.139346Z", - "shell.execute_reply": "2024-09-17T23:23:19.138667Z" + "iopub.execute_input": "2024-09-23T15:50:26.485418Z", + "iopub.status.busy": "2024-09-23T15:50:26.485212Z", + "iopub.status.idle": "2024-09-23T15:50:27.879212Z", + "shell.execute_reply": "2024-09-23T15:50:27.878545Z" } }, "outputs": [ @@ -150,28 +150,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:23:18 INFO Tracker configurations: {}\n" + "torchx 2024-09-23 15:50:26 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:23:18 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" + "torchx 2024-09-23 15:50:26 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:23:18 INFO Log directory is: /tmp/torchx_pqo87ixa\n" + "torchx 2024-09-23 15:50:26 INFO Log directory is: /tmp/torchx_3pxeqfza\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:23:18 INFO Waiting for the app to finish...\n" + "torchx 2024-09-23 15:50:26 INFO Waiting for the app to finish...\n" ] }, { @@ -185,14 +185,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:23:19 INFO Job finished: SUCCEEDED\n" + "torchx 2024-09-23 15:50:27 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_cwd://torchx/hello_world-tv443npdvbthz\n" + "local_cwd://torchx/hello_world-t612tjqcg72tm\n" ] } ], @@ -203,7 +203,7 @@ }, { "cell_type": "markdown", - "id": "2998dab9", + "id": "e36c7d90", "metadata": {}, "source": [ "If we want to run in other environments, we can build a Docker container so we\n", @@ -221,13 +221,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "099bad88", + "id": "a63b2677", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:23:19.141615Z", - "iopub.status.busy": "2024-09-17T23:23:19.141354Z", - "iopub.status.idle": "2024-09-17T23:23:19.145580Z", - "shell.execute_reply": "2024-09-17T23:23:19.144953Z" + "iopub.execute_input": "2024-09-23T15:50:27.881809Z", + "iopub.status.busy": "2024-09-23T15:50:27.881382Z", + "iopub.status.idle": "2024-09-23T15:50:27.885771Z", + "shell.execute_reply": "2024-09-23T15:50:27.885140Z" } }, "outputs": [ @@ -249,7 +249,7 @@ }, { "cell_type": "markdown", - "id": "fd3edba2", + "id": "045e60bf", "metadata": {}, "source": [ "Once we have the Dockerfile created we can create our docker image." @@ -258,13 +258,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "6f2241ae", + "id": "4a2659f0", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:23:19.147901Z", - "iopub.status.busy": "2024-09-17T23:23:19.147404Z", - "iopub.status.idle": "2024-09-17T23:25:15.813035Z", - "shell.execute_reply": "2024-09-17T23:25:15.812394Z" + "iopub.execute_input": "2024-09-23T15:50:27.888030Z", + "iopub.status.busy": "2024-09-23T15:50:27.887574Z", + "iopub.status.idle": "2024-09-23T15:52:12.718599Z", + "shell.execute_reply": "2024-09-23T15:52:12.717745Z" } }, "outputs": [ @@ -293,7 +293,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#1 transferring dockerfile: 99B done\n" + "#1 transferring dockerfile: 99B 0.0s done\n" ] }, { @@ -321,7 +321,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#2 DONE 0.7s\n" + "#2 DONE 0.4s\n" ] }, { @@ -405,14 +405,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c 7.21kB / 7.21kB done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 0B / 189B 0.1s\n" + "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 189B / 189B 0.1s done\n" ] }, { @@ -426,672 +419,602 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 7.34MB / 26.70MB 0.3s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 857B / 857B 0.2s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 0B / 2.00GB 0.3s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 189B / 189B 0.2s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 20.97MB / 26.70MB 0.5s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 7.34MB / 9.94MB 0.5s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 26.70MB / 26.70MB 0.6s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 9.94MB / 9.94MB 0.5s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0B / 21.46MB 0.6s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 0B / 132B 0.6s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca\n" + "#5 sha256:3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c 7.21kB / 7.21kB done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 132B / 132B 0.7s done\n" + "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0B / 9.94MB 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 0B / 257B 0.7s\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 0B / 26.70MB 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 257B / 257B 0.9s done\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 0B / 2.00GB 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 0B / 1.71GB 0.9s\n" + "#5 sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 857B / 857B 0.1s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 3.15MB / 21.46MB 1.0s\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 12.58MB / 26.70MB 0.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 10.49MB / 21.46MB 1.1s\n" + "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 9.94MB / 9.94MB 0.3s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 17.83MB / 21.46MB 1.2s\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 20.97MB / 26.70MB 0.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 21.46MB / 21.46MB 1.2s done\n" + "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 0B / 132B 0.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 0B / 92B 1.4s\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 26.70MB / 26.70MB 0.4s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 0.9s done\n" + "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 132B / 132B 0.4s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 92B / 92B 1.4s done\n" + "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 0B / 352B 1.5s\n" + "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 0B / 257B 0.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0B / 21.46MB 0.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 352B / 352B 1.6s done\n" + "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 257B / 257B 0.5s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 done\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 6.91MB / 21.46MB 0.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 0B / 341.29MB 1.7s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 0B / 1.71GB 0.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f done\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 21.46MB / 21.46MB 0.6s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203\n" + "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 0B / 92B 0.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 89.13MB / 1.71GB 2.2s\n" + "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 92B / 92B 0.6s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 20.97MB / 341.29MB 2.2s\n" + "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 0B / 352B 0.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 110.10MB / 2.00GB 2.6s\n" + "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 352B / 352B 0.7s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 45.09MB / 341.29MB 2.6s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 0B / 341.29MB 0.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.7s done\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 18.80MB / 341.29MB 1.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 63.96MB / 341.29MB 3.0s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 100.66MB / 2.00GB 1.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 175.11MB / 1.71GB 3.3s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 91.13MB / 1.71GB 1.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 83.89MB / 341.29MB 3.4s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 54.53MB / 341.29MB 1.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 109.05MB / 341.29MB 3.8s\n" + "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 1.1s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 126.88MB / 341.29MB 4.2s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 75.50MB / 341.29MB 1.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 211.81MB / 2.00GB 4.5s\n" + "#5 extracting sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 268.44MB / 1.71GB 4.5s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 97.52MB / 341.29MB 1.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 144.70MB / 341.29MB 4.8s\n" + "#5 extracting sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 163.58MB / 341.29MB 5.2s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 188.74MB / 1.71GB 1.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 354.42MB / 1.71GB 5.6s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 130.02MB / 341.29MB 1.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 187.70MB / 341.29MB 5.6s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 180.36MB / 341.29MB 2.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 205.52MB / 341.29MB 5.9s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 285.21MB / 1.71GB 2.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 227.54MB / 341.29MB 6.3s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 206.57MB / 341.29MB 2.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 324.01MB / 2.00GB 6.6s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 231.74MB / 341.29MB 2.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 246.42MB / 341.29MB 6.6s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 272.63MB / 341.29MB 2.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 447.74MB / 1.71GB 6.8s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 208.67MB / 2.00GB 2.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 265.29MB / 341.29MB 6.9s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 373.29MB / 1.71GB 3.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 284.16MB / 341.29MB 7.2s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 301.99MB / 341.29MB 3.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 303.04MB / 341.29MB 7.5s\n" + "#5 extracting sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 533.73MB / 1.71GB 7.9s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 327.16MB / 341.29MB 3.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 324.01MB / 341.29MB 7.9s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 477.10MB / 1.71GB 3.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 424.67MB / 2.00GB 8.4s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 341.29MB / 341.29MB 3.8s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 341.29MB / 341.29MB 8.4s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 320.86MB / 2.00GB 4.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 341.29MB / 341.29MB 8.7s done\n" + "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 0B / 563.38kB 8.7s\n" + "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 0B / 563.38kB 4.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 563.38kB / 563.38kB 8.8s done\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 570.43MB / 1.71GB 4.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 0B / 556.96kB 8.9s\n" + "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 563.38kB / 563.38kB 4.2s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 627.05MB / 1.71GB 9.1s\n" + "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 556.96kB / 556.96kB 4.3s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 556.96kB / 556.96kB 9.2s done\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 658.51MB / 1.71GB 4.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 528.48MB / 2.00GB 10.1s\n" + "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.7s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 714.08MB / 1.71GB 10.2s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 426.72MB / 2.00GB 5.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 804.26MB / 1.71GB 11.4s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 775.95MB / 1.71GB 5.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 631.24MB / 2.00GB 12.3s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 529.53MB / 2.00GB 6.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 899.68MB / 1.71GB 12.6s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 870.32MB / 1.71GB 6.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.00GB / 1.71GB 13.9s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 965.74MB / 1.71GB 6.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 736.10MB / 2.00GB 14.4s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 642.78MB / 2.00GB 7.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.10GB / 1.71GB 15.2s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.06GB / 1.71GB 7.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 836.76MB / 2.00GB 16.3s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 763.36MB / 2.00GB 7.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.19GB / 1.71GB 16.3s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.15GB / 1.71GB 8.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.27GB / 1.71GB 17.4s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 878.71MB / 2.00GB 8.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 938.48MB / 2.00GB 18.1s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.27GB / 1.71GB 8.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.37GB / 1.71GB 18.9s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.36GB / 1.71GB 9.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.04GB / 2.00GB 19.7s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 989.86MB / 2.00GB 9.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.46GB / 1.71GB 20.0s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.48GB / 1.71GB 9.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.55GB / 1.71GB 21.1s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.58GB / 1.71GB 10.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.14GB / 2.00GB 21.4s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.10GB / 2.00GB 10.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.63GB / 1.71GB 22.2s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.67GB / 1.71GB 10.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.25GB / 2.00GB 23.3s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.20GB / 2.00GB 11.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.71GB / 1.71GB 23.3s done\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.32GB / 2.00GB 11.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.36GB / 2.00GB 24.8s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.42GB / 2.00GB 12.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.46GB / 2.00GB 26.7s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.71GB / 1.71GB 13.2s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.56GB / 2.00GB 28.6s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.53GB / 2.00GB 13.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.67GB / 2.00GB 30.5s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.64GB / 2.00GB 14.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.77GB / 2.00GB 32.1s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.75GB / 2.00GB 15.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.87GB / 2.00GB 33.8s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.85GB / 2.00GB 16.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.98GB / 2.00GB 36.2s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.97GB / 2.00GB 17.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 2.00GB / 2.00GB 38.4s done\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 2.00GB / 2.00GB 19.4s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 0.1s\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 5.2s\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 5.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 10.3s\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 10.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 15.4s\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 15.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 20.4s\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 20.3s\n" ] }, { @@ -1105,7 +1028,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 30.0s done\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 30.1s done\n" ] }, { @@ -1133,7 +1056,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0.7s done\n" + "#5 extracting sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0.8s done\n" ] }, { @@ -1175,14 +1098,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 15.3s\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 15.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 17.6s done\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 18.7s done\n" ] }, { @@ -1199,13 +1122,6 @@ "#5 extracting sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 done\n" ] }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 extracting sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2\n" - ] - }, { "name": "stderr", "output_type": "stream", @@ -1217,21 +1133,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233\n" + "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 5.1s\n" + "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 5.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 10.2s\n" + "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 10.3s\n" ] }, { @@ -1252,14 +1168,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 0.1s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 extracting sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968\n" + "#5 extracting sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 0.0s done\n" ] }, { @@ -1273,7 +1182,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 DONE 107.7s\n" + "#5 DONE 88.7s\n" ] }, { @@ -1329,7 +1238,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#7 writing image sha256:9d9fa455e0781fdf3c31a167be389dcca28ff3963c4bc64aa9e0dd5f9b73326c done\n" + "#7 writing image sha256:52f92f1ea8896a4af1a2d7d38d453472fadfd957266638a6a697d9be0c1eb796 done\n" ] }, { @@ -1354,7 +1263,7 @@ }, { "cell_type": "markdown", - "id": "3e979258", + "id": "5a0acc7f", "metadata": {}, "source": [ "We can then launch it on the local scheduler." @@ -1363,13 +1272,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "28b1ac8f", + "id": "f692f77f", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:25:15.815243Z", - "iopub.status.busy": "2024-09-17T23:25:15.814959Z", - "iopub.status.idle": "2024-09-17T23:25:27.930055Z", - "shell.execute_reply": "2024-09-17T23:25:27.929373Z" + "iopub.execute_input": "2024-09-23T15:52:12.721382Z", + "iopub.status.busy": "2024-09-23T15:52:12.720905Z", + "iopub.status.idle": "2024-09-23T15:52:23.305719Z", + "shell.execute_reply": "2024-09-23T15:52:23.304941Z" } }, "outputs": [ @@ -1377,133 +1286,133 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:16 INFO Tracker configurations: {}\n" + "torchx 2024-09-23 15:52:13 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:16 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-09-23 15:52:13 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:16 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-09-23 15:52:13 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:16 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-09-23 15:52:13 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:17 WARNING failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.45/images/create?tag=latest&fromImage=my_app: Not Found (\"pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied\")\n" + "torchx 2024-09-23 15:52:13 WARNING failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.45/images/create?tag=latest&fromImage=my_app: Not Found (\"pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied\")\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:17 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-09-23 15:52:13 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:17 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-09-23 15:52:13 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:17 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-09-23 15:52:13 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:17 INFO ---> 9d9fa455e078\n" + "torchx 2024-09-23 15:52:13 INFO ---> 52f92f1ea889\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:17 INFO Step 3/4 : COPY . .\n" + "torchx 2024-09-23 15:52:13 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:21 INFO ---> 5e2bee314bac\n" + "torchx 2024-09-23 15:52:17 INFO ---> e41cf3989ab3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:21 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-09-23 15:52:17 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:21 INFO ---> Running in 171d9d56d726\n" + "torchx 2024-09-23 15:52:17 INFO ---> Running in a02128ae8d20\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:25 INFO ---> Removed intermediate container 171d9d56d726\n" + "torchx 2024-09-23 15:52:21 INFO ---> Removed intermediate container a02128ae8d20\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:25 INFO ---> a3f23864b182\n" + "torchx 2024-09-23 15:52:21 INFO ---> df927abef23e\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:25 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-09-23 15:52:21 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:25 INFO Successfully built a3f23864b182\n" + "torchx 2024-09-23 15:52:21 INFO Successfully built df927abef23e\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:25 INFO Built new image `sha256:a3f23864b1828796d32cfd7bf208ed0f7fb0d40161c4f9d40353a9b3dc308eaa` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.\n" + "torchx 2024-09-23 15:52:21 INFO Built new image `sha256:df927abef23e3c2c2e3fe0f1968dac429f9c27bda68c1c5cc0db55e084070de4` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:26 INFO Waiting for the app to finish...\n" + "torchx 2024-09-23 15:52:22 INFO Waiting for the app to finish...\n" ] }, { @@ -1517,14 +1426,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:27 INFO Job finished: SUCCEEDED\n" + "torchx 2024-09-23 15:52:23 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/hello_world-ht4qx5sv7hmrbc\n" + "local_docker://torchx/hello_world-nggtfwhqbh90pd\n" ] } ], @@ -1535,7 +1444,7 @@ }, { "cell_type": "markdown", - "id": "1bdb97b7", + "id": "1e79d262", "metadata": {}, "source": [ "If you have a Kubernetes cluster you can use the [Kubernetes scheduler](schedulers/kubernetes.rst) to launch\n", @@ -1544,7 +1453,7 @@ }, { "cell_type": "markdown", - "id": "ae2ca401", + "id": "f75c4c72", "metadata": { "lines_to_next_cell": 2, "region_name": "md" @@ -1558,7 +1467,7 @@ }, { "cell_type": "markdown", - "id": "dc36c0da", + "id": "1671759b", "metadata": {}, "source": [ "## Builtins\n", @@ -1570,13 +1479,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "a7ab140b", + "id": "18d85121", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:25:27.932731Z", - "iopub.status.busy": "2024-09-17T23:25:27.932322Z", - "iopub.status.idle": "2024-09-17T23:25:28.921912Z", - "shell.execute_reply": "2024-09-17T23:25:28.921266Z" + "iopub.execute_input": "2024-09-23T15:52:23.308352Z", + "iopub.status.busy": "2024-09-23T15:52:23.307929Z", + "iopub.status.idle": "2024-09-23T15:52:24.282605Z", + "shell.execute_reply": "2024-09-23T15:52:24.281909Z" } }, "outputs": [ @@ -1672,7 +1581,7 @@ }, { "cell_type": "markdown", - "id": "1ef83e5f", + "id": "59dd5b46", "metadata": {}, "source": [ "You can use these either from the CLI, from a pipeline or programmatically like\n", @@ -1682,13 +1591,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "a57c9751", + "id": "14757963", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:25:28.924337Z", - "iopub.status.busy": "2024-09-17T23:25:28.924003Z", - "iopub.status.idle": "2024-09-17T23:28:43.793063Z", - "shell.execute_reply": "2024-09-17T23:28:43.792379Z" + "iopub.execute_input": "2024-09-23T15:52:24.285356Z", + "iopub.status.busy": "2024-09-23T15:52:24.284827Z", + "iopub.status.idle": "2024-09-23T15:55:01.444775Z", + "shell.execute_reply": "2024-09-23T15:55:01.444042Z" } }, "outputs": [ @@ -1696,133 +1605,133 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:30 INFO Tracker configurations: {}\n" + "torchx 2024-09-23 15:52:25 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:30 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-09-23 15:52:25 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:30 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-09-23 15:52:25 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:25:30 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-09-23 15:52:25 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:29 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-09-23 15:54:46 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:29 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-09-23 15:54:46 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:29 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-09-23 15:54:46 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:29 INFO ---> c31c993343e6\n" + "torchx 2024-09-23 15:54:46 INFO ---> 3ad6a395fc8b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:29 INFO Step 3/4 : COPY . .\n" + "torchx 2024-09-23 15:54:46 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:36 INFO ---> daf1a773479e\n" + "torchx 2024-09-23 15:54:53 INFO ---> 3cd7205d8e0d\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:36 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-09-23 15:54:53 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:36 INFO ---> Running in d48689ba95d6\n" + "torchx 2024-09-23 15:54:53 INFO ---> Running in e6f5c28b7488\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:43 INFO ---> Removed intermediate container d48689ba95d6\n" + "torchx 2024-09-23 15:55:01 INFO ---> Removed intermediate container e6f5c28b7488\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:43 INFO ---> 8067f3439367\n" + "torchx 2024-09-23 15:55:01 INFO ---> b98ce4756599\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:43 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-09-23 15:55:01 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:43 INFO Successfully built 8067f3439367\n" + "torchx 2024-09-23 15:55:01 INFO Successfully built b98ce4756599\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:43 INFO Built new image `sha256:8067f34393673970175578c1b5189f3ca20981976885cd29c5c486536d2d26ef` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.\n" + "torchx 2024-09-23 15:55:01 INFO Built new image `sha256:b98ce4756599a7543c6edd010ceea97dad6110805187418250d643814c43de0a` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:43 INFO Waiting for the app to finish...\n" + "torchx 2024-09-23 15:55:01 INFO Waiting for the app to finish...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:43 INFO Job finished: SUCCEEDED\n" + "torchx 2024-09-23 15:55:01 INFO Job finished: SUCCEEDED\n" ] }, { @@ -1836,7 +1745,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/echo-m2w4xscjv1cqj\n" + "local_docker://torchx/echo-twjnz522z350w\n" ] } ], diff --git a/0.8.0dev0/pipelines/airflow.html b/0.8.0dev0/pipelines/airflow.html index 7b393d427..2033fee8b 100644 --- a/0.8.0dev0/pipelines/airflow.html +++ b/0.8.0dev0/pipelines/airflow.html @@ -529,7 +529,7 @@

Airflow
-
/tmp/ipykernel_4021/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.
+
/tmp/ipykernel_3946/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.
 
@@ -537,28 +537,28 @@

Airflow
-[2024-09-17T23:28:47.088+0000] {taskinstance.py:2612} INFO - Dependencies all met for dep_context=non-requeueable deps ti=<TaskInstance: example_python_operator-vdz046223nt34c.hello_torchx manual__2021-09-13T00:00:00+00:00 [None]>
-[2024-09-17T23:28:47.093+0000] {taskinstance.py:2612} INFO - Dependencies all met for dep_context=requeueable deps ti=<TaskInstance: example_python_operator-vdz046223nt34c.hello_torchx manual__2021-09-13T00:00:00+00:00 [None]>
-[2024-09-17T23:28:47.094+0000] {taskinstance.py:2865} INFO - Starting attempt 0 of 1
-[2024-09-17T23:28:47.095+0000] {taskinstance.py:2946} WARNING - cannot record queued_duration for task hello_torchx because previous state change time has not been saved
-[2024-09-17T23:28:47.104+0000] {taskinstance.py:2888} INFO - Executing <Task(_PythonDecoratedOperator): hello_torchx> on 2021-09-13 00:00:00+00:00
-[2024-09-17T23:28:47.641+0000] {taskinstance.py:3131} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-vdz046223nt34c' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'
+[2024-09-23T15:55:04.703+0000] {taskinstance.py:2612} INFO - Dependencies all met for dep_context=non-requeueable deps ti=<TaskInstance: example_python_operator-btw5k45qjjfcld.hello_torchx manual__2021-09-13T00:00:00+00:00 [None]>
+[2024-09-23T15:55:04.709+0000] {taskinstance.py:2612} INFO - Dependencies all met for dep_context=requeueable deps ti=<TaskInstance: example_python_operator-btw5k45qjjfcld.hello_torchx manual__2021-09-13T00:00:00+00:00 [None]>
+[2024-09-23T15:55:04.709+0000] {taskinstance.py:2865} INFO - Starting attempt 0 of 1
+[2024-09-23T15:55:04.710+0000] {taskinstance.py:2946} WARNING - cannot record queued_duration for task hello_torchx because previous state change time has not been saved
+[2024-09-23T15:55:04.720+0000] {taskinstance.py:2888} INFO - Executing <Task(_PythonDecoratedOperator): hello_torchx> on 2021-09-13 00:00:00+00:00
+[2024-09-23T15:55:05.249+0000] {taskinstance.py:3131} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-btw5k45qjjfcld' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'
 Task instance is in running state
  Previous state of the Task instance: queued
-Current task name:hello_torchx state:running start_date:2024-09-17 23:28:47.089672+00:00
-Dag name:example_python_operator-vdz046223nt34c and current dag run status:running
-[2024-09-17T23:28:47.644+0000] {taskinstance.py:731} INFO - ::endgroup::
-[2024-09-17T23:28:48.347+0000] {api.py:72} INFO - Tracker configurations: {}
-[2024-09-17T23:28:48.350+0000] {local_scheduler.py:771} INFO - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
-[2024-09-17T23:28:48.351+0000] {local_scheduler.py:777} INFO - Log directory is: /tmp/torchx_tmc4gx24
+Current task name:hello_torchx state:running start_date:2024-09-23 15:55:04.704675+00:00
+Dag name:example_python_operator-btw5k45qjjfcld and current dag run status:running
+[2024-09-23T15:55:05.252+0000] {taskinstance.py:731} INFO - ::endgroup::
+[2024-09-23T15:55:05.955+0000] {api.py:72} INFO - Tracker configurations: {}
+[2024-09-23T15:55:05.958+0000] {local_scheduler.py:771} INFO - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
+[2024-09-23T15:55:05.959+0000] {local_scheduler.py:777} INFO - Log directory is: /tmp/torchx_2_iu5bt7
 Hello, TorchX!
-[2024-09-17T23:28:48.456+0000] {python.py:240} INFO - Done. Returned value was: None
-[2024-09-17T23:28:48.461+0000] {taskinstance.py:340} INFO - ::group::Post task execution logs
-[2024-09-17T23:28:48.462+0000] {taskinstance.py:352} INFO - Marking task as SUCCESS. dag_id=example_python_operator-vdz046223nt34c, task_id=hello_torchx, run_id=manual__2021-09-13T00:00:00+00:00, execution_date=20210913T000000, start_date=20240917T232847, end_date=20240917T232848
+[2024-09-23T15:55:06.064+0000] {python.py:240} INFO - Done. Returned value was: None
+[2024-09-23T15:55:06.069+0000] {taskinstance.py:340} INFO - ::group::Post task execution logs
+[2024-09-23T15:55:06.070+0000] {taskinstance.py:352} INFO - Marking task as SUCCESS. dag_id=example_python_operator-btw5k45qjjfcld, task_id=hello_torchx, run_id=manual__2021-09-13T00:00:00+00:00, execution_date=20210913T000000, start_date=20240923T155504, end_date=20240923T155506
 Task instance in success state
  Previous state of the Task instance: running
-Dag name:example_python_operator-vdz046223nt34c queued_at:None
-Task hostname:runner.dkgqmwv5kzretlg0t13gj4hs5g.xx.internal.cloudapp.net operator:_PythonDecoratedOperator
+Dag name:example_python_operator-btw5k45qjjfcld queued_at:None
+Task hostname:runner.41vifxnojfsejhejp0axg4rq0g.bx.internal.cloudapp.net operator:_PythonDecoratedOperator
 

If all goes well you should see Hello, TorchX! printed above.

diff --git a/0.8.0dev0/pipelines/airflow.ipynb b/0.8.0dev0/pipelines/airflow.ipynb index 4fa193678..9b694820c 100644 --- a/0.8.0dev0/pipelines/airflow.ipynb +++ b/0.8.0dev0/pipelines/airflow.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "c1a38de2", + "id": "a211a0db", "metadata": {}, "source": [ "# Airflow\n", @@ -18,13 +18,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "62e2008d", + "id": "94105e88", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:28:46.092799Z", - "iopub.status.busy": "2024-09-17T23:28:46.092605Z", - "iopub.status.idle": "2024-09-17T23:28:46.852539Z", - "shell.execute_reply": "2024-09-17T23:28:46.851934Z" + "iopub.execute_input": "2024-09-23T15:55:03.717893Z", + "iopub.status.busy": "2024-09-23T15:55:03.717688Z", + "iopub.status.idle": "2024-09-23T15:55:04.468544Z", + "shell.execute_reply": "2024-09-23T15:55:04.467854Z" } }, "outputs": [], @@ -44,7 +44,7 @@ }, { "cell_type": "markdown", - "id": "540a07e8", + "id": "a9a90f92", "metadata": {}, "source": [ "To launch a TorchX job from Airflow you can create a Airflow Python task to\n", @@ -56,13 +56,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "0b636a58", + "id": "0c8a4078", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:28:46.854982Z", - "iopub.status.busy": "2024-09-17T23:28:46.854678Z", - "iopub.status.idle": "2024-09-17T23:28:46.859547Z", - "shell.execute_reply": "2024-09-17T23:28:46.858979Z" + "iopub.execute_input": "2024-09-23T15:55:04.471308Z", + "iopub.status.busy": "2024-09-23T15:55:04.470733Z", + "iopub.status.idle": "2024-09-23T15:55:04.475676Z", + "shell.execute_reply": "2024-09-23T15:55:04.475125Z" } }, "outputs": [], @@ -93,7 +93,7 @@ }, { "cell_type": "markdown", - "id": "84034af2", + "id": "fb48dc9f", "metadata": {}, "source": [ "Once we have the task defined we can put it into a Airflow DAG and run it like\n", @@ -103,24 +103,24 @@ { "cell_type": "code", "execution_count": 3, - "id": "29c55d7d", + "id": "be4e2190", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:28:46.861500Z", - "iopub.status.busy": "2024-09-17T23:28:46.861294Z", - "iopub.status.idle": "2024-09-17T23:28:48.471417Z", - "shell.execute_reply": "2024-09-17T23:28:48.470815Z" + "iopub.execute_input": "2024-09-23T15:55:04.477733Z", + "iopub.status.busy": "2024-09-23T15:55:04.477351Z", + "iopub.status.idle": "2024-09-23T15:55:06.079671Z", + "shell.execute_reply": "2024-09-23T15:55:06.079052Z" } }, "outputs": [ { "data": { "text/html": [ - "
/tmp/ipykernel_4021/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\n",
+       "
/tmp/ipykernel_3946/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\n",
        "
\n" ], "text/plain": [ - "\u001b[1;33m/tmp/ipykernel_4021/\u001b[0m\u001b[1;33m454499020.py\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m3\u001b[0m\u001b[1;33m RemovedInAirflow3Warning\u001b[0m\u001b[33m: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\u001b[0m\n" + "\u001b[1;33m/tmp/ipykernel_3946/\u001b[0m\u001b[1;33m454499020.py\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m3\u001b[0m\u001b[1;33m RemovedInAirflow3Warning\u001b[0m\u001b[33m: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\u001b[0m\n" ] }, "metadata": {}, @@ -130,42 +130,42 @@ "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:47.088+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2612} INFO\u001b[0m - Dependencies all met for dep_context=non-requeueable deps ti=\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:04.703+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2612} INFO\u001b[0m - Dependencies all met for dep_context=non-requeueable deps ti=\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:47.093+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2612} INFO\u001b[0m - Dependencies all met for dep_context=requeueable deps ti=\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:04.709+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2612} INFO\u001b[0m - Dependencies all met for dep_context=requeueable deps ti=\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:47.094+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2865} INFO\u001b[0m - Starting attempt 0 of 1\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:04.709+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2865} INFO\u001b[0m - Starting attempt 0 of 1\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:47.095+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2946} \u001b[33mWARNING\u001b[0m - \u001b[33mcannot record queued_duration for task hello_torchx because previous state change time has not been saved\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:04.710+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2946} \u001b[33mWARNING\u001b[0m - \u001b[33mcannot record queued_duration for task hello_torchx because previous state change time has not been saved\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:47.104+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2888} INFO\u001b[0m - Executing on 2021-09-13 00:00:00+00:00\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:04.720+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2888} INFO\u001b[0m - Executing on 2021-09-13 00:00:00+00:00\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:47.641+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m3131} INFO\u001b[0m - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-vdz046223nt34c' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:05.249+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m3131} INFO\u001b[0m - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-btw5k45qjjfcld' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'\u001b[0m\n" ] }, { @@ -174,30 +174,30 @@ "text": [ "Task instance is in running state\n", " Previous state of the Task instance: queued\n", - "Current task name:hello_torchx state:running start_date:2024-09-17 23:28:47.089672+00:00\n", - "Dag name:example_python_operator-vdz046223nt34c and current dag run status:running\n", - "[\u001b[34m2024-09-17T23:28:47.644+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m731} INFO\u001b[0m - ::endgroup::\u001b[0m\n" + "Current task name:hello_torchx state:running start_date:2024-09-23 15:55:04.704675+00:00\n", + "Dag name:example_python_operator-btw5k45qjjfcld and current dag run status:running\n", + "[\u001b[34m2024-09-23T15:55:05.252+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m731} INFO\u001b[0m - ::endgroup::\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:48.347+0000\u001b[0m] {\u001b[34mapi.py:\u001b[0m72} INFO\u001b[0m - Tracker configurations: {}\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:05.955+0000\u001b[0m] {\u001b[34mapi.py:\u001b[0m72} INFO\u001b[0m - Tracker configurations: {}\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:48.350+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m771} INFO\u001b[0m - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:05.958+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m771} INFO\u001b[0m - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:48.351+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m777} INFO\u001b[0m - Log directory is: /tmp/torchx_tmc4gx24\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:05.959+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m777} INFO\u001b[0m - Log directory is: /tmp/torchx_2_iu5bt7\u001b[0m\n" ] }, { @@ -205,21 +205,21 @@ "output_type": "stream", "text": [ "Hello, TorchX!\n", - "[\u001b[34m2024-09-17T23:28:48.456+0000\u001b[0m] {\u001b[34mpython.py:\u001b[0m240} INFO\u001b[0m - Done. Returned value was: None\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:06.064+0000\u001b[0m] {\u001b[34mpython.py:\u001b[0m240} INFO\u001b[0m - Done. Returned value was: None\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:48.461+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m340} INFO\u001b[0m - ::group::Post task execution logs\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:06.069+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m340} INFO\u001b[0m - ::group::Post task execution logs\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-09-17T23:28:48.462+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m352} INFO\u001b[0m - Marking task as SUCCESS. dag_id=example_python_operator-vdz046223nt34c, task_id=hello_torchx, run_id=manual__2021-09-13T00:00:00+00:00, execution_date=20210913T000000, start_date=20240917T232847, end_date=20240917T232848\u001b[0m\n" + "[\u001b[34m2024-09-23T15:55:06.070+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m352} INFO\u001b[0m - Marking task as SUCCESS. dag_id=example_python_operator-btw5k45qjjfcld, task_id=hello_torchx, run_id=manual__2021-09-13T00:00:00+00:00, execution_date=20210913T000000, start_date=20240923T155504, end_date=20240923T155506\u001b[0m\n" ] }, { @@ -228,8 +228,8 @@ "text": [ "Task instance in success state\n", " Previous state of the Task instance: running\n", - "Dag name:example_python_operator-vdz046223nt34c queued_at:None\n", - "Task hostname:runner.dkgqmwv5kzretlg0t13gj4hs5g.xx.internal.cloudapp.net operator:_PythonDecoratedOperator\n" + "Dag name:example_python_operator-btw5k45qjjfcld queued_at:None\n", + "Task hostname:runner.41vifxnojfsejhejp0axg4rq0g.bx.internal.cloudapp.net operator:_PythonDecoratedOperator\n" ] } ], @@ -261,7 +261,7 @@ }, { "cell_type": "markdown", - "id": "732011f3", + "id": "c7ec4d6d", "metadata": {}, "source": [ "If all goes well you should see `Hello, TorchX!` printed above.\n", diff --git a/0.8.0dev0/quickstart.html b/0.8.0dev0/quickstart.html index 503eb401e..f0eb1b93d 100644 --- a/0.8.0dev0/quickstart.html +++ b/0.8.0dev0/quickstart.html @@ -582,12 +582,12 @@

Launching
-torchx 2024-09-17 23:28:52 INFO     Tracker configurations: {}
-torchx 2024-09-17 23:28:52 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
-torchx 2024-09-17 23:28:52 INFO     Log directory is: /tmp/torchx_gfuu7tjy
-torchx 2024-09-17 23:28:52 INFO     Waiting for the app to finish...
+torchx 2024-09-23 15:55:10 INFO     Tracker configurations: {}
+torchx 2024-09-23 15:55:10 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
+torchx 2024-09-23 15:55:10 INFO     Log directory is: /tmp/torchx_5umfql3w
+torchx 2024-09-23 15:55:10 INFO     Waiting for the app to finish...
 python/0 Hello, your name!
-torchx 2024-09-17 23:28:53 INFO     Job finished: SUCCEEDED
+torchx 2024-09-23 15:55:11 INFO     Job finished: SUCCEEDED
 

We can run the exact same app via the local_docker scheduler. This scheduler will package up the local workspace as a layer on top of the specified image. This provides a very similar environment to the container based remote schedulers.

@@ -615,26 +615,26 @@

Launching
-torchx 2024-09-17 23:28:55 INFO     Tracker configurations: {}
-torchx 2024-09-17 23:28:55 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-09-17 23:28:55 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-09-17 23:28:55 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-09-17 23:28:55 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-09-17 23:28:55 INFO     Step 1/4 : ARG IMAGE
-torchx 2024-09-17 23:28:55 INFO     Step 2/4 : FROM $IMAGE
-torchx 2024-09-17 23:28:55 INFO      ---> c31c993343e6
-torchx 2024-09-17 23:28:55 INFO     Step 3/4 : COPY . .
-torchx 2024-09-17 23:29:02 INFO      ---> 34b2b4e966d3
-torchx 2024-09-17 23:29:02 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-09-17 23:29:02 INFO      ---> Running in 28f05a8742c5
-torchx 2024-09-17 23:29:09 INFO      ---> Removed intermediate container 28f05a8742c5
-torchx 2024-09-17 23:29:09 INFO      ---> 4af5064ebb2c
-torchx 2024-09-17 23:29:09 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
-torchx 2024-09-17 23:29:09 INFO     Successfully built 4af5064ebb2c
-torchx 2024-09-17 23:29:09 INFO     Built new image `sha256:4af5064ebb2c6fa4027d373be8540e2ac3f3da68b7bf5b0ae8bf0a234cdc6ad5` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.
-torchx 2024-09-17 23:29:09 INFO     Waiting for the app to finish...
+torchx 2024-09-23 15:55:12 INFO     Tracker configurations: {}
+torchx 2024-09-23 15:55:12 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-09-23 15:55:12 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-09-23 15:55:12 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-09-23 15:55:13 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-09-23 15:55:13 INFO     Step 1/4 : ARG IMAGE
+torchx 2024-09-23 15:55:13 INFO     Step 2/4 : FROM $IMAGE
+torchx 2024-09-23 15:55:13 INFO      ---> 3ad6a395fc8b
+torchx 2024-09-23 15:55:13 INFO     Step 3/4 : COPY . .
+torchx 2024-09-23 15:55:20 INFO      ---> de401ca8ebeb
+torchx 2024-09-23 15:55:20 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-09-23 15:55:20 INFO      ---> Running in 411561f3d284
+torchx 2024-09-23 15:55:27 INFO      ---> Removed intermediate container 411561f3d284
+torchx 2024-09-23 15:55:27 INFO      ---> eea7936212d8
+torchx 2024-09-23 15:55:27 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
+torchx 2024-09-23 15:55:27 INFO     Successfully built eea7936212d8
+torchx 2024-09-23 15:55:27 INFO     Built new image `sha256:eea7936212d84823304b8ef0b67bc10a172507ffd482d3e2708570e6c4bad211` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.
+torchx 2024-09-23 15:55:27 INFO     Waiting for the app to finish...
 python/0 Hello, your name!
-torchx 2024-09-17 23:29:10 INFO     Job finished: SUCCEEDED
+torchx 2024-09-23 15:55:28 INFO     Job finished: SUCCEEDED
 

TorchX defaults to using the ghcr.io/pytorch/torchx Docker container image which contains the PyTorch libraries, TorchX and related dependencies.

@@ -765,41 +765,41 @@

Distributed
-torchx 2024-09-17 23:29:13 INFO     Tracker configurations: {}
-torchx 2024-09-17 23:29:13 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-09-17 23:29:13 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-09-17 23:29:13 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-09-17 23:29:13 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-09-17 23:29:13 INFO     Step 1/4 : ARG IMAGE
-torchx 2024-09-17 23:29:13 INFO     Step 2/4 : FROM $IMAGE
-torchx 2024-09-17 23:29:13 INFO      ---> c31c993343e6
-torchx 2024-09-17 23:29:13 INFO     Step 3/4 : COPY . .
-torchx 2024-09-17 23:29:20 INFO      ---> fae2e9b14916
-torchx 2024-09-17 23:29:20 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-09-17 23:29:20 INFO      ---> Running in 4ce46cbb343a
-torchx 2024-09-17 23:29:27 INFO      ---> Removed intermediate container 4ce46cbb343a
-torchx 2024-09-17 23:29:27 INFO      ---> a20cc84a8517
-torchx 2024-09-17 23:29:27 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
-torchx 2024-09-17 23:29:27 INFO     Successfully built a20cc84a8517
-torchx 2024-09-17 23:29:27 INFO     Built new image `sha256:a20cc84a8517b5b8c0b7a95730dcef03f05bc22c653f06dc5b71c6d202a8c7a2` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.
-torchx 2024-09-17 23:29:28 INFO     Waiting for the app to finish...
-dist_app/1 [2024-09-17 23:29:29,347] torch.distributed.run: [WARNING]
-dist_app/1 [2024-09-17 23:29:29,347] torch.distributed.run: [WARNING] *****************************************
-dist_app/1 [2024-09-17 23:29:29,347] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
-dist_app/1 [2024-09-17 23:29:29,347] torch.distributed.run: [WARNING] *****************************************
-dist_app/0 [2024-09-17 23:29:29,358] torch.distributed.run: [WARNING]
-dist_app/0 [2024-09-17 23:29:29,358] torch.distributed.run: [WARNING] *****************************************
-dist_app/0 [2024-09-17 23:29:29,358] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
-dist_app/0 [2024-09-17 23:29:29,358] torch.distributed.run: [WARNING] *****************************************
+torchx 2024-09-23 15:55:31 INFO     Tracker configurations: {}
+torchx 2024-09-23 15:55:31 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-09-23 15:55:31 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-09-23 15:55:31 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-09-23 15:55:31 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-09-23 15:55:31 INFO     Step 1/4 : ARG IMAGE
+torchx 2024-09-23 15:55:31 INFO     Step 2/4 : FROM $IMAGE
+torchx 2024-09-23 15:55:31 INFO      ---> 3ad6a395fc8b
+torchx 2024-09-23 15:55:31 INFO     Step 3/4 : COPY . .
+torchx 2024-09-23 15:55:38 INFO      ---> 238275fff00a
+torchx 2024-09-23 15:55:38 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-09-23 15:55:38 INFO      ---> Running in e9fb20f84299
+torchx 2024-09-23 15:55:45 INFO      ---> Removed intermediate container e9fb20f84299
+torchx 2024-09-23 15:55:45 INFO      ---> 2909315be35d
+torchx 2024-09-23 15:55:45 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
+torchx 2024-09-23 15:55:45 INFO     Successfully built 2909315be35d
+torchx 2024-09-23 15:55:45 INFO     Built new image `sha256:2909315be35d852b07547b81ed3442beb080ea59a595b1285ba7aa0bcb5e83d9` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.
+torchx 2024-09-23 15:55:46 INFO     Waiting for the app to finish...
+dist_app/0 [2024-09-23 15:55:47,189] torch.distributed.run: [WARNING]
+dist_app/0 [2024-09-23 15:55:47,189] torch.distributed.run: [WARNING] *****************************************
+dist_app/0 [2024-09-23 15:55:47,189] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
+dist_app/0 [2024-09-23 15:55:47,189] torch.distributed.run: [WARNING] *****************************************
+dist_app/1 [2024-09-23 15:55:47,193] torch.distributed.run: [WARNING]
+dist_app/1 [2024-09-23 15:55:47,193] torch.distributed.run: [WARNING] *****************************************
+dist_app/1 [2024-09-23 15:55:47,193] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
+dist_app/1 [2024-09-23 15:55:47,193] torch.distributed.run: [WARNING] *****************************************
+dist_app/1 [1]:I am worker 3 of 4!
+dist_app/1 [1]:all_reduce output = tensor([6])
+dist_app/1 [0]:I am worker 2 of 4!
+dist_app/1 [0]:all_reduce output = tensor([6])
 dist_app/0 [0]:I am worker 0 of 4!
 dist_app/0 [0]:all_reduce output = tensor([6])
 dist_app/0 [1]:I am worker 1 of 4!
 dist_app/0 [1]:all_reduce output = tensor([6])
-dist_app/1 [0]:I am worker 2 of 4!
-dist_app/1 [0]:all_reduce output = tensor([6])
-dist_app/1 [1]:I am worker 3 of 4!
-dist_app/1 [1]:all_reduce output = tensor([6])
-torchx 2024-09-17 23:29:36 INFO     Job finished: SUCCEEDED
+torchx 2024-09-23 15:55:54 INFO     Job finished: SUCCEEDED
 
@@ -1189,57 +1189,57 @@

Docker-based Schedulers
-torchx 2024-09-17 23:29:38 INFO     loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig
-torchx 2024-09-17 23:29:39 INFO     Tracker configurations: {}
-torchx 2024-09-17 23:29:39 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-09-17 23:29:39 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-09-17 23:29:39 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-09-17 23:29:39 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-09-17 23:29:40 INFO     Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime
-torchx 2024-09-17 23:30:46 INFO      ---> c3f17e5ac010
-torchx 2024-09-17 23:30:46 INFO     Step 2/4 : RUN pip install timm
-torchx 2024-09-17 23:30:46 INFO      ---> Running in 5dfa98496545
-torchx 2024-09-17 23:30:47 INFO     Collecting timm
-torchx 2024-09-17 23:30:47 INFO       Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)
-torchx 2024-09-17 23:30:47 INFO     Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)
-torchx 2024-09-17 23:30:47 INFO     Collecting huggingface-hub
-torchx 2024-09-17 23:30:47 INFO       Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
-torchx 2024-09-17 23:30:47 INFO     Collecting safetensors
-torchx 2024-09-17 23:30:47 INFO       Downloading safetensors-0.4.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (436 kB)
-torchx 2024-09-17 23:30:47 INFO     Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)
-torchx 2024-09-17 23:30:47 INFO     Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)
-torchx 2024-09-17 23:30:47 INFO     Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)
-torchx 2024-09-17 23:30:48 INFO     Collecting importlib-metadata
-torchx 2024-09-17 23:30:48 INFO       Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)
-torchx 2024-09-17 23:30:48 INFO     Collecting fsspec
-torchx 2024-09-17 23:30:48 INFO       Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)
-torchx 2024-09-17 23:30:48 INFO     Collecting packaging>=20.9
-torchx 2024-09-17 23:30:48 INFO       Downloading packaging-24.0-py3-none-any.whl (53 kB)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)
-torchx 2024-09-17 23:30:48 INFO     Collecting zipp>=0.5
-torchx 2024-09-17 23:30:48 INFO       Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)
-torchx 2024-09-17 23:30:48 INFO     Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)
-torchx 2024-09-17 23:30:48 INFO     Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm
-torchx 2024-09-17 23:30:49 INFO     Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.5 timm-0.9.12 zipp-3.15.0
-torchx 2024-09-17 23:30:52 INFO      ---> Removed intermediate container 5dfa98496545
-torchx 2024-09-17 23:30:52 INFO      ---> 031c7ece2d28
-torchx 2024-09-17 23:30:52 INFO     Step 3/4 : COPY . .
-torchx 2024-09-17 23:30:54 INFO      ---> 2fec9d7b0b4b
-torchx 2024-09-17 23:30:54 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-09-17 23:30:54 INFO      ---> Running in c249b92c283b
-torchx 2024-09-17 23:30:56 INFO      ---> Removed intermediate container c249b92c283b
-torchx 2024-09-17 23:30:56 INFO      ---> 8c8d1a4d8eee
-torchx 2024-09-17 23:30:56 INFO     [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed
-torchx 2024-09-17 23:30:56 INFO     Successfully built 8c8d1a4d8eee
-torchx 2024-09-17 23:30:56 INFO     Built new image `sha256:8c8d1a4d8eeefe6cc84db12525474ba429267311a91ecf8590996621004f82f6` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.
-torchx 2024-09-17 23:30:56 INFO     Waiting for the app to finish...
+torchx 2024-09-23 15:55:56 INFO     loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig
+torchx 2024-09-23 15:55:57 INFO     Tracker configurations: {}
+torchx 2024-09-23 15:55:57 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-09-23 15:55:57 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-09-23 15:55:57 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-09-23 15:55:57 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-09-23 15:55:57 INFO     Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime
+torchx 2024-09-23 15:57:23 INFO      ---> c3f17e5ac010
+torchx 2024-09-23 15:57:23 INFO     Step 2/4 : RUN pip install timm
+torchx 2024-09-23 15:57:23 INFO      ---> Running in 2fbb8f7df77c
+torchx 2024-09-23 15:57:24 INFO     Collecting timm
+torchx 2024-09-23 15:57:24 INFO       Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)
+torchx 2024-09-23 15:57:24 INFO     Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)
+torchx 2024-09-23 15:57:24 INFO     Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)
+torchx 2024-09-23 15:57:24 INFO     Collecting safetensors
+torchx 2024-09-23 15:57:24 INFO       Downloading safetensors-0.4.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (436 kB)
+torchx 2024-09-23 15:57:24 INFO     Collecting huggingface-hub
+torchx 2024-09-23 15:57:24 INFO       Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
+torchx 2024-09-23 15:57:24 INFO     Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)
+torchx 2024-09-23 15:57:24 INFO     Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)
+torchx 2024-09-23 15:57:25 INFO     Collecting packaging>=20.9
+torchx 2024-09-23 15:57:25 INFO       Downloading packaging-24.0-py3-none-any.whl (53 kB)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)
+torchx 2024-09-23 15:57:25 INFO     Collecting fsspec
+torchx 2024-09-23 15:57:25 INFO       Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)
+torchx 2024-09-23 15:57:25 INFO     Collecting importlib-metadata
+torchx 2024-09-23 15:57:25 INFO       Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)
+torchx 2024-09-23 15:57:25 INFO     Collecting zipp>=0.5
+torchx 2024-09-23 15:57:25 INFO       Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)
+torchx 2024-09-23 15:57:25 INFO     Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)
+torchx 2024-09-23 15:57:25 INFO     Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm
+torchx 2024-09-23 15:57:26 INFO     Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.5 timm-0.9.12 zipp-3.15.0
+torchx 2024-09-23 15:57:29 INFO      ---> Removed intermediate container 2fbb8f7df77c
+torchx 2024-09-23 15:57:29 INFO      ---> cd59277de919
+torchx 2024-09-23 15:57:29 INFO     Step 3/4 : COPY . .
+torchx 2024-09-23 15:57:31 INFO      ---> 1b7cd0dc068f
+torchx 2024-09-23 15:57:31 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-09-23 15:57:31 INFO      ---> Running in 27d618b9e7a2
+torchx 2024-09-23 15:57:33 INFO      ---> Removed intermediate container 27d618b9e7a2
+torchx 2024-09-23 15:57:33 INFO      ---> 50dd9580bcfb
+torchx 2024-09-23 15:57:33 INFO     [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed
+torchx 2024-09-23 15:57:34 INFO     Successfully built 50dd9580bcfb
+torchx 2024-09-23 15:57:34 INFO     Built new image `sha256:50dd9580bcfb2fe652d2897cbb2b8f4b954454288d3bc644b45146bb7532773d` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.
+torchx 2024-09-23 15:57:34 INFO     Waiting for the app to finish...
 python/0 ResNet(
 python/0   (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
 python/0   (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -1348,7 +1348,7 @@ 

Docker-based Schedulers

diff --git a/0.8.0dev0/quickstart.ipynb b/0.8.0dev0/quickstart.ipynb index 3885219d5..e6400830b 100644 --- a/0.8.0dev0/quickstart.ipynb +++ b/0.8.0dev0/quickstart.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "a869ff5a", + "id": "d975deed", "metadata": {}, "source": [ "# Quickstart\n", @@ -18,7 +18,7 @@ }, { "cell_type": "markdown", - "id": "d0c47e41", + "id": "ada55c13", "metadata": { "region_name": "md" }, @@ -31,7 +31,7 @@ }, { "cell_type": "markdown", - "id": "8ca24a05", + "id": "45224141", "metadata": {}, "source": [ "See the [README](https://github.com/pytorch/torchx) for more\n", @@ -41,13 +41,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "7b4b7dca", + "id": "cb3e561c", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:28:50.606635Z", - "iopub.status.busy": "2024-09-17T23:28:50.606085Z", - "iopub.status.idle": "2024-09-17T23:28:50.925877Z", - "shell.execute_reply": "2024-09-17T23:28:50.925244Z" + "iopub.execute_input": "2024-09-23T15:55:08.376388Z", + "iopub.status.busy": "2024-09-23T15:55:08.376181Z", + "iopub.status.idle": "2024-09-23T15:55:08.689045Z", + "shell.execute_reply": "2024-09-23T15:55:08.688406Z" } }, "outputs": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "233d8567", + "id": "11aa02c8", "metadata": {}, "source": [ "## Hello World\n", @@ -189,13 +189,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "d0f2ff9f", + "id": "87ab0c11", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:28:50.928417Z", - "iopub.status.busy": "2024-09-17T23:28:50.928015Z", - "iopub.status.idle": "2024-09-17T23:28:50.932638Z", - "shell.execute_reply": "2024-09-17T23:28:50.932094Z" + "iopub.execute_input": "2024-09-23T15:55:08.691456Z", + "iopub.status.busy": "2024-09-23T15:55:08.690937Z", + "iopub.status.idle": "2024-09-23T15:55:08.695490Z", + "shell.execute_reply": "2024-09-23T15:55:08.694956Z" } }, "outputs": [ @@ -217,7 +217,7 @@ }, { "cell_type": "markdown", - "id": "c45e806f", + "id": "8497772d", "metadata": {}, "source": [ "## Launching\n", @@ -231,13 +231,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "438d624e", + "id": "7842f534", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:28:50.934650Z", - "iopub.status.busy": "2024-09-17T23:28:50.934288Z", - "iopub.status.idle": "2024-09-17T23:28:51.983787Z", - "shell.execute_reply": "2024-09-17T23:28:51.983143Z" + "iopub.execute_input": "2024-09-23T15:55:08.697751Z", + "iopub.status.busy": "2024-09-23T15:55:08.697255Z", + "iopub.status.idle": "2024-09-23T15:55:09.799304Z", + "shell.execute_reply": "2024-09-23T15:55:09.798529Z" } }, "outputs": [ @@ -494,7 +494,7 @@ }, { "cell_type": "markdown", - "id": "d261760c", + "id": "47794272", "metadata": {}, "source": [ "The component takes in the script name and any extra arguments will be passed to\n", @@ -504,13 +504,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "6b888198", + "id": "346a6a49", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:28:51.986026Z", - "iopub.status.busy": "2024-09-17T23:28:51.985686Z", - "iopub.status.idle": "2024-09-17T23:28:54.021768Z", - "shell.execute_reply": "2024-09-17T23:28:54.021132Z" + "iopub.execute_input": "2024-09-23T15:55:09.801522Z", + "iopub.status.busy": "2024-09-23T15:55:09.801246Z", + "iopub.status.idle": "2024-09-23T15:55:11.804904Z", + "shell.execute_reply": "2024-09-23T15:55:11.804096Z" } }, "outputs": [ @@ -518,28 +518,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:52 INFO Tracker configurations: {}\n" + "torchx 2024-09-23 15:55:10 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:52 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" + "torchx 2024-09-23 15:55:10 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:52 INFO Log directory is: /tmp/torchx_gfuu7tjy\n" + "torchx 2024-09-23 15:55:10 INFO Log directory is: /tmp/torchx_5umfql3w\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:52 INFO Waiting for the app to finish...\n" + "torchx 2024-09-23 15:55:10 INFO Waiting for the app to finish...\n" ] }, { @@ -553,14 +553,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:53 INFO Job finished: SUCCEEDED\n" + "torchx 2024-09-23 15:55:11 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_cwd://torchx/torchx_utils_python-mhnbpcwr59rn2\n" + "local_cwd://torchx/torchx_utils_python-x79jqw2xl224bc\n" ] } ], @@ -571,7 +571,7 @@ }, { "cell_type": "markdown", - "id": "8bcc50c5", + "id": "39b9ea60", "metadata": {}, "source": [ "We can run the exact same app via the `local_docker` scheduler. This scheduler\n", @@ -590,13 +590,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "42c9ac3f", + "id": "3bfd533c", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:28:54.023949Z", - "iopub.status.busy": "2024-09-17T23:28:54.023667Z", - "iopub.status.idle": "2024-09-17T23:29:10.875437Z", - "shell.execute_reply": "2024-09-17T23:29:10.874736Z" + "iopub.execute_input": "2024-09-23T15:55:11.807624Z", + "iopub.status.busy": "2024-09-23T15:55:11.807097Z", + "iopub.status.idle": "2024-09-23T15:55:28.827081Z", + "shell.execute_reply": "2024-09-23T15:55:28.826417Z" } }, "outputs": [ @@ -604,126 +604,126 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO Tracker configurations: {}\n" + "torchx 2024-09-23 15:55:12 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-09-23 15:55:12 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-09-23 15:55:12 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-09-23 15:55:12 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-09-23 15:55:13 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-09-23 15:55:13 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-09-23 15:55:13 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO ---> c31c993343e6\n" + "torchx 2024-09-23 15:55:13 INFO ---> 3ad6a395fc8b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:28:55 INFO Step 3/4 : COPY . .\n" + "torchx 2024-09-23 15:55:13 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:02 INFO ---> 34b2b4e966d3\n" + "torchx 2024-09-23 15:55:20 INFO ---> de401ca8ebeb\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:02 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-09-23 15:55:20 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:02 INFO ---> Running in 28f05a8742c5\n" + "torchx 2024-09-23 15:55:20 INFO ---> Running in 411561f3d284\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:09 INFO ---> Removed intermediate container 28f05a8742c5\n" + "torchx 2024-09-23 15:55:27 INFO ---> Removed intermediate container 411561f3d284\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:09 INFO ---> 4af5064ebb2c\n" + "torchx 2024-09-23 15:55:27 INFO ---> eea7936212d8\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:09 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-09-23 15:55:27 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:09 INFO Successfully built 4af5064ebb2c\n" + "torchx 2024-09-23 15:55:27 INFO Successfully built eea7936212d8\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:09 INFO Built new image `sha256:4af5064ebb2c6fa4027d373be8540e2ac3f3da68b7bf5b0ae8bf0a234cdc6ad5` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" + "torchx 2024-09-23 15:55:27 INFO Built new image `sha256:eea7936212d84823304b8ef0b67bc10a172507ffd482d3e2708570e6c4bad211` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:09 INFO Waiting for the app to finish...\n" + "torchx 2024-09-23 15:55:27 INFO Waiting for the app to finish...\n" ] }, { @@ -737,14 +737,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:10 INFO Job finished: SUCCEEDED\n" + "torchx 2024-09-23 15:55:28 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/torchx_utils_python-hfstzzzfnd2fm\n" + "local_docker://torchx/torchx_utils_python-kp7nk9sndlv1sd\n" ] } ], @@ -755,7 +755,7 @@ }, { "cell_type": "markdown", - "id": "bea51572", + "id": "21478a40", "metadata": {}, "source": [ "TorchX defaults to using the\n", @@ -773,13 +773,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "3ae95ede", + "id": "ea74070c", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:29:10.878022Z", - "iopub.status.busy": "2024-09-17T23:29:10.877690Z", - "iopub.status.idle": "2024-09-17T23:29:12.017795Z", - "shell.execute_reply": "2024-09-17T23:29:12.017161Z" + "iopub.execute_input": "2024-09-23T15:55:28.829598Z", + "iopub.status.busy": "2024-09-23T15:55:28.829093Z", + "iopub.status.idle": "2024-09-23T15:55:29.979133Z", + "shell.execute_reply": "2024-09-23T15:55:29.978493Z" } }, "outputs": [ @@ -1190,7 +1190,7 @@ }, { "cell_type": "markdown", - "id": "cb620e87", + "id": "f1f6b06a", "metadata": {}, "source": [ "Lets create a slightly more interesting app to leverage the TorchX distributed\n", @@ -1200,13 +1200,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "a367c1f0", + "id": "f9389c38", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:29:12.020077Z", - "iopub.status.busy": "2024-09-17T23:29:12.019805Z", - "iopub.status.idle": "2024-09-17T23:29:12.023960Z", - "shell.execute_reply": "2024-09-17T23:29:12.023410Z" + "iopub.execute_input": "2024-09-23T15:55:29.981408Z", + "iopub.status.busy": "2024-09-23T15:55:29.981182Z", + "iopub.status.idle": "2024-09-23T15:55:29.985737Z", + "shell.execute_reply": "2024-09-23T15:55:29.985191Z" } }, "outputs": [ @@ -1234,7 +1234,7 @@ }, { "cell_type": "markdown", - "id": "15ce7fd3", + "id": "21e7c1b9", "metadata": {}, "source": [ "Let launch a small job with 2 nodes and 2 worker processes per node:" @@ -1243,13 +1243,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "66b54098", + "id": "13a6d60f", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:29:12.025937Z", - "iopub.status.busy": "2024-09-17T23:29:12.025600Z", - "iopub.status.idle": "2024-09-17T23:29:36.378634Z", - "shell.execute_reply": "2024-09-17T23:29:36.377986Z" + "iopub.execute_input": "2024-09-23T15:55:29.987812Z", + "iopub.status.busy": "2024-09-23T15:55:29.987356Z", + "iopub.status.idle": "2024-09-23T15:55:54.243590Z", + "shell.execute_reply": "2024-09-23T15:55:54.242794Z" } }, "outputs": [ @@ -1257,252 +1257,252 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO Tracker configurations: {}\n" + "torchx 2024-09-23 15:55:31 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-09-23 15:55:31 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-09-23 15:55:31 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-09-23 15:55:31 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-09-23 15:55:31 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-09-23 15:55:31 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-09-23 15:55:31 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO ---> c31c993343e6\n" + "torchx 2024-09-23 15:55:31 INFO ---> 3ad6a395fc8b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:13 INFO Step 3/4 : COPY . .\n" + "torchx 2024-09-23 15:55:31 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:20 INFO ---> fae2e9b14916\n" + "torchx 2024-09-23 15:55:38 INFO ---> 238275fff00a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:20 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-09-23 15:55:38 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:20 INFO ---> Running in 4ce46cbb343a\n" + "torchx 2024-09-23 15:55:38 INFO ---> Running in e9fb20f84299\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:27 INFO ---> Removed intermediate container 4ce46cbb343a\n" + "torchx 2024-09-23 15:55:45 INFO ---> Removed intermediate container e9fb20f84299\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:27 INFO ---> a20cc84a8517\n" + "torchx 2024-09-23 15:55:45 INFO ---> 2909315be35d\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:27 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-09-23 15:55:45 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:27 INFO Successfully built a20cc84a8517\n" + "torchx 2024-09-23 15:55:45 INFO Successfully built 2909315be35d\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:27 INFO Built new image `sha256:a20cc84a8517b5b8c0b7a95730dcef03f05bc22c653f06dc5b71c6d202a8c7a2` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.\n" + "torchx 2024-09-23 15:55:45 INFO Built new image `sha256:2909315be35d852b07547b81ed3442beb080ea59a595b1285ba7aa0bcb5e83d9` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:28 INFO Waiting for the app to finish...\n" + "torchx 2024-09-23 15:55:46 INFO Waiting for the app to finish...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-09-17 23:29:29,347] torch.distributed.run: [WARNING] \n" + "dist_app/0 [2024-09-23 15:55:47,189] torch.distributed.run: [WARNING] \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-09-17 23:29:29,347] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/0 [2024-09-23 15:55:47,189] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-09-17 23:29:29,347] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" + "dist_app/0 [2024-09-23 15:55:47,189] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-09-17 23:29:29,347] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/0 [2024-09-23 15:55:47,189] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-09-17 23:29:29,358] torch.distributed.run: [WARNING] \n" + "dist_app/1 [2024-09-23 15:55:47,193] torch.distributed.run: [WARNING] \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-09-17 23:29:29,358] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/1 [2024-09-23 15:55:47,193] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-09-17 23:29:29,358] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" + "dist_app/1 [2024-09-23 15:55:47,193] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-09-17 23:29:29,358] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/1 [2024-09-23 15:55:47,193] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [0]:I am worker 0 of 4!\n" + "dist_app/1 [1]:I am worker 3 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [0]:all_reduce output = tensor([6])\n" + "dist_app/1 [1]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [1]:I am worker 1 of 4!\n" + "dist_app/1 [0]:I am worker 2 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [1]:all_reduce output = tensor([6])\n" + "dist_app/1 [0]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [0]:I am worker 2 of 4!\n" + "dist_app/0 [0]:I am worker 0 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [0]:all_reduce output = tensor([6])\n" + "dist_app/0 [0]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [1]:I am worker 3 of 4!\n" + "dist_app/0 [1]:I am worker 1 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [1]:all_reduce output = tensor([6])\n" + "dist_app/0 [1]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:36 INFO Job finished: SUCCEEDED\n" + "torchx 2024-09-23 15:55:54 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/dist_app-r7s41p4ksn6nrd\n" + "local_docker://torchx/dist_app-lnb5t0lj51c3zc\n" ] } ], @@ -1513,7 +1513,7 @@ }, { "cell_type": "markdown", - "id": "47e27ec4", + "id": "ad27d074", "metadata": {}, "source": [ "## Workspaces / Patching\n", @@ -1542,13 +1542,13 @@ { "cell_type": "code", "execution_count": 9, - "id": "42b8301d", + "id": "d5275fa2", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:29:36.380937Z", - "iopub.status.busy": "2024-09-17T23:29:36.380672Z", - "iopub.status.idle": "2024-09-17T23:29:36.384694Z", - "shell.execute_reply": "2024-09-17T23:29:36.384174Z" + "iopub.execute_input": "2024-09-23T15:55:54.246509Z", + "iopub.status.busy": "2024-09-23T15:55:54.246085Z", + "iopub.status.idle": "2024-09-23T15:55:54.251062Z", + "shell.execute_reply": "2024-09-23T15:55:54.250505Z" } }, "outputs": [ @@ -1573,7 +1573,7 @@ }, { "cell_type": "markdown", - "id": "d6b078ee", + "id": "1e09b494", "metadata": {}, "source": [ "## Remote Schedulers\n", @@ -1588,7 +1588,7 @@ }, { "cell_type": "markdown", - "id": "68b2a4f0", + "id": "4c38ea94", "metadata": { "region_name": "md" }, @@ -1603,7 +1603,7 @@ }, { "cell_type": "markdown", - "id": "f9f49b39", + "id": "0e90ae68", "metadata": {}, "source": [ "Depending on the scheduler there may be a few extra configuration parameters so\n", @@ -1613,7 +1613,7 @@ }, { "cell_type": "markdown", - "id": "28e68c92", + "id": "8339a6e9", "metadata": {}, "source": [ "All config options:" @@ -1622,13 +1622,13 @@ { "cell_type": "code", "execution_count": 10, - "id": "03a75195", + "id": "6fedb26c", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:29:36.386963Z", - "iopub.status.busy": "2024-09-17T23:29:36.386484Z", - "iopub.status.idle": "2024-09-17T23:29:38.194703Z", - "shell.execute_reply": "2024-09-17T23:29:38.193868Z" + "iopub.execute_input": "2024-09-23T15:55:54.253184Z", + "iopub.status.busy": "2024-09-23T15:55:54.252871Z", + "iopub.status.idle": "2024-09-23T15:55:56.080475Z", + "shell.execute_reply": "2024-09-23T15:55:56.079583Z" }, "lines_to_next_cell": 2 }, @@ -3412,7 +3412,7 @@ }, { "cell_type": "markdown", - "id": "4af86db0", + "id": "acd1cda3", "metadata": {}, "source": [ "## Custom Images\n", @@ -3427,13 +3427,13 @@ { "cell_type": "code", "execution_count": 11, - "id": "b09bf4d2", + "id": "c0aa6100", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:29:38.197701Z", - "iopub.status.busy": "2024-09-17T23:29:38.197124Z", - "iopub.status.idle": "2024-09-17T23:29:38.202360Z", - "shell.execute_reply": "2024-09-17T23:29:38.201771Z" + "iopub.execute_input": "2024-09-23T15:55:56.083472Z", + "iopub.status.busy": "2024-09-23T15:55:56.083077Z", + "iopub.status.idle": "2024-09-23T15:55:56.087427Z", + "shell.execute_reply": "2024-09-23T15:55:56.086809Z" } }, "outputs": [ @@ -3456,13 +3456,13 @@ { "cell_type": "code", "execution_count": 12, - "id": "2674378f", + "id": "994f3332", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:29:38.204513Z", - "iopub.status.busy": "2024-09-17T23:29:38.204078Z", - "iopub.status.idle": "2024-09-17T23:29:38.208210Z", - "shell.execute_reply": "2024-09-17T23:29:38.207674Z" + "iopub.execute_input": "2024-09-23T15:55:56.089609Z", + "iopub.status.busy": "2024-09-23T15:55:56.089239Z", + "iopub.status.idle": "2024-09-23T15:55:56.093088Z", + "shell.execute_reply": "2024-09-23T15:55:56.092539Z" } }, "outputs": [ @@ -3486,7 +3486,7 @@ }, { "cell_type": "markdown", - "id": "44f3c25c", + "id": "cdd3384a", "metadata": {}, "source": [ "Once we have the Dockerfile created we can launch as normal and TorchX will\n", @@ -3497,13 +3497,13 @@ { "cell_type": "code", "execution_count": 13, - "id": "fc2d0695", + "id": "dd538cf2", "metadata": { "execution": { - "iopub.execute_input": "2024-09-17T23:29:38.210078Z", - "iopub.status.busy": "2024-09-17T23:29:38.209789Z", - "iopub.status.idle": "2024-09-17T23:30:59.022497Z", - "shell.execute_reply": "2024-09-17T23:30:59.021732Z" + "iopub.execute_input": "2024-09-23T15:55:56.095465Z", + "iopub.status.busy": "2024-09-23T15:55:56.094823Z", + "iopub.status.idle": "2024-09-23T15:57:36.292816Z", + "shell.execute_reply": "2024-09-23T15:57:36.292088Z" } }, "outputs": [ @@ -3511,357 +3511,357 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:38 INFO loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig\n" + "torchx 2024-09-23 15:55:56 INFO loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:39 INFO Tracker configurations: {}\n" + "torchx 2024-09-23 15:55:57 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:39 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-09-23 15:55:57 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:39 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-09-23 15:55:57 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:39 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-09-23 15:55:57 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:39 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-09-23 15:55:57 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:29:40 INFO Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime\n" + "torchx 2024-09-23 15:55:57 INFO Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:46 INFO ---> c3f17e5ac010\n" + "torchx 2024-09-23 15:57:23 INFO ---> c3f17e5ac010\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:46 INFO Step 2/4 : RUN pip install timm\n" + "torchx 2024-09-23 15:57:23 INFO Step 2/4 : RUN pip install timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:46 INFO ---> Running in 5dfa98496545\n" + "torchx 2024-09-23 15:57:23 INFO ---> Running in 2fbb8f7df77c\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Collecting timm\n" + "torchx 2024-09-23 15:57:24 INFO Collecting timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)\n" + "torchx 2024-09-23 15:57:24 INFO Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)\n" + "torchx 2024-09-23 15:57:24 INFO Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Collecting huggingface-hub\n" + "torchx 2024-09-23 15:57:24 INFO Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n" + "torchx 2024-09-23 15:57:24 INFO Collecting safetensors\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Collecting safetensors\n" + "torchx 2024-09-23 15:57:24 INFO Downloading safetensors-0.4.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (436 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Downloading safetensors-0.4.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (436 kB)\n" + "torchx 2024-09-23 15:57:24 INFO Collecting huggingface-hub\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)\n" + "torchx 2024-09-23 15:57:24 INFO Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)\n" + "torchx 2024-09-23 15:57:24 INFO Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:47 INFO Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)\n" + "torchx 2024-09-23 15:57:24 INFO Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)\n" + "torchx 2024-09-23 15:57:25 INFO Collecting packaging>=20.9\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Collecting importlib-metadata\n" + "torchx 2024-09-23 15:57:25 INFO Downloading packaging-24.0-py3-none-any.whl (53 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Collecting fsspec\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Collecting packaging>=20.9\n" + "torchx 2024-09-23 15:57:25 INFO Collecting fsspec\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Downloading packaging-24.0-py3-none-any.whl (53 kB)\n" + "torchx 2024-09-23 15:57:25 INFO Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)\n" + "torchx 2024-09-23 15:57:25 INFO Collecting importlib-metadata\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)\n" + "torchx 2024-09-23 15:57:25 INFO Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Collecting zipp>=0.5\n" + "torchx 2024-09-23 15:57:25 INFO Collecting zipp>=0.5\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)\n" + "torchx 2024-09-23 15:57:25 INFO Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)\n" + "torchx 2024-09-23 15:57:25 INFO Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:48 INFO Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm\n" + "torchx 2024-09-23 15:57:25 INFO Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:49 INFO Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.5 timm-0.9.12 zipp-3.15.0\n" + "torchx 2024-09-23 15:57:26 INFO Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.5 timm-0.9.12 zipp-3.15.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:52 INFO ---> Removed intermediate container 5dfa98496545\n" + "torchx 2024-09-23 15:57:29 INFO ---> Removed intermediate container 2fbb8f7df77c\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:52 INFO ---> 031c7ece2d28\n" + "torchx 2024-09-23 15:57:29 INFO ---> cd59277de919\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:52 INFO Step 3/4 : COPY . .\n" + "torchx 2024-09-23 15:57:29 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:54 INFO ---> 2fec9d7b0b4b\n" + "torchx 2024-09-23 15:57:31 INFO ---> 1b7cd0dc068f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:54 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-09-23 15:57:31 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:54 INFO ---> Running in c249b92c283b\n" + "torchx 2024-09-23 15:57:31 INFO ---> Running in 27d618b9e7a2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:56 INFO ---> Removed intermediate container c249b92c283b\n" + "torchx 2024-09-23 15:57:33 INFO ---> Removed intermediate container 27d618b9e7a2\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:56 INFO ---> 8c8d1a4d8eee\n" + "torchx 2024-09-23 15:57:33 INFO ---> 50dd9580bcfb\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:56 INFO [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed\n" + "torchx 2024-09-23 15:57:33 INFO [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:56 INFO Successfully built 8c8d1a4d8eee\n" + "torchx 2024-09-23 15:57:34 INFO Successfully built 50dd9580bcfb\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:56 INFO Built new image `sha256:8c8d1a4d8eeefe6cc84db12525474ba429267311a91ecf8590996621004f82f6` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" + "torchx 2024-09-23 15:57:34 INFO Built new image `sha256:50dd9580bcfb2fe652d2897cbb2b8f4b954454288d3bc644b45146bb7532773d` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:56 INFO Waiting for the app to finish...\n" + "torchx 2024-09-23 15:57:34 INFO Waiting for the app to finish...\n" ] }, { @@ -4624,14 +4624,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-09-17 23:30:58 INFO Job finished: SUCCEEDED\n" + "torchx 2024-09-23 15:57:36 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/torchx_utils_python-glmbbmq1b5vwt\n" + "local_docker://torchx/torchx_utils_python-dgl1xfq9j5jbw\n" ] } ], @@ -4642,7 +4642,7 @@ }, { "cell_type": "markdown", - "id": "2521d1d7", + "id": "001f9196", "metadata": {}, "source": [ "### Slurm\n", @@ -4653,7 +4653,7 @@ }, { "cell_type": "markdown", - "id": "d69adb43", + "id": "2462e83e", "metadata": {}, "source": [ "## Next Steps\n", diff --git a/0.8.0dev0/searchindex.js b/0.8.0dev0/searchindex.js index c7c2efc94..6698bea2f 100644 --- a/0.8.0dev0/searchindex.js +++ b/0.8.0dev0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["advanced", "app_best_practices", "basics", "cli", "component_best_practices", "components/distributed", "components/interpret", "components/metrics", "components/overview", "components/serve", "components/train", "components/utils", "custom_components", "examples_apps/compute_world_size/main", "examples_apps/datapreproc/datapreproc", "examples_apps/index", "examples_apps/lightning/data", "examples_apps/lightning/interpret", "examples_apps/lightning/model", "examples_apps/lightning/profiler", "examples_apps/lightning/train", "examples_pipelines/index", "examples_pipelines/kfp/advanced_pipeline", "examples_pipelines/kfp/dist_pipeline", "examples_pipelines/kfp/intro_pipeline", "index", "pipelines", "pipelines/airflow", "pipelines/kfp", "quickstart", "runner", "runner.config", "runtime/overview", "runtime/tracking", "schedulers", "schedulers/aws_batch", "schedulers/aws_sagemaker", "schedulers/docker", "schedulers/gcp_batch", "schedulers/kubernetes", "schedulers/kubernetes_mcad", "schedulers/local", "schedulers/lsf", "schedulers/ray", "schedulers/slurm", "specs", "tracker", "workspace"], "filenames": ["advanced.rst", "app_best_practices.rst", "basics.rst", "cli.rst", "component_best_practices.rst", "components/distributed.rst", "components/interpret.rst", "components/metrics.rst", "components/overview.rst", "components/serve.rst", "components/train.rst", "components/utils.rst", "custom_components.md", "examples_apps/compute_world_size/main.rst", "examples_apps/datapreproc/datapreproc.rst", "examples_apps/index.rst", "examples_apps/lightning/data.rst", "examples_apps/lightning/interpret.rst", "examples_apps/lightning/model.rst", "examples_apps/lightning/profiler.rst", "examples_apps/lightning/train.rst", "examples_pipelines/index.rst", "examples_pipelines/kfp/advanced_pipeline.rst", "examples_pipelines/kfp/dist_pipeline.rst", "examples_pipelines/kfp/intro_pipeline.rst", "index.rst", "pipelines.rst", "pipelines/airflow.md", "pipelines/kfp.rst", "quickstart.md", "runner.rst", "runner.config.rst", "runtime/overview.rst", "runtime/tracking.rst", "schedulers.rst", "schedulers/aws_batch.rst", "schedulers/aws_sagemaker.rst", "schedulers/docker.rst", "schedulers/gcp_batch.rst", "schedulers/kubernetes.rst", "schedulers/kubernetes_mcad.rst", "schedulers/local.rst", "schedulers/lsf.rst", "schedulers/ray.rst", "schedulers/slurm.rst", "specs.rst", "tracker.rst", "workspace.rst"], "titles": ["Advanced Usage", "App Best Practices", "Basic Concepts", "CLI", "Component Best Practices", "Distributed", "Interpret", "Metrics", "Overview", "Serve", "Train", "Utils", "Custom Components", "Compute World Size Example", "Data Preprocessing App Example", "Application Examples", "Trainer Datasets Example", "Model Interpretability Example", "Tiny ImageNet Model", "Simple Logging Profiler", "Trainer Example", "Pipelines Examples", "Advanced KubeFlow Pipelines Example", "Distributed KubeFlow Pipelines Example", "Intro KubeFlow Pipelines Example", "TorchX", "torchx.pipelines", "Airflow", "Kubeflow Pipelines", "Quickstart", "torchx.runner", ".torchxconfig", "Overview", "Tracking", "torchx.schedulers", "AWS Batch", "AWS SageMaker", "Docker", "GCP Batch", "Kubernetes", "Kubernetes-MCAD", "Local", "IBM Spectrum LSF", "Ray", "Slurm", "torchx.specs", "torchx.tracker", "torchx.workspace"], "terms": {"torchx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 27, 29, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "defin": [0, 2, 3, 8, 16, 22, 23, 24, 27, 29, 31, 34, 36, 40, 45, 46, 47], "plugin": [0, 34, 39, 40], "point": [0, 1, 2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "you": [0, 1, 2, 3, 4, 5, 7, 8, 10, 12, 14, 15, 16, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 42, 43, 45, 47], "configur": [0, 4, 12, 27, 29, 30, 31, 34, 35, 36, 38, 40, 43, 44, 45, 47], "best": [0, 2, 3, 8, 10, 30], "support": [0, 1, 2, 4, 8, 11, 25, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "your": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 18, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 42, 45, 46, 47], "infrastructur": [0, 1, 2, 13, 15, 32], "setup": [0, 3, 4, 13, 15, 16, 17, 20, 22, 35, 45], "most": [0, 1, 2, 3, 22, 45], "done": [0, 12, 27], "through": [0, 2, 3, 8, 27, 28, 29, 31, 45, 47], "python": [0, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 30, 43, 45], "s": [0, 1, 3, 4, 5, 8, 11, 14, 15, 16, 17, 18, 19, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "entri": [0, 29, 30, 36], "requir": [0, 2, 4, 5, 8, 10, 11, 12, 14, 15, 17, 22, 29, 31, 32, 35, 36, 39, 40, 41, 43, 45, 46, 47], "packag": [0, 22, 27, 29, 30, 46], "contain": [0, 2, 3, 5, 8, 9, 11, 12, 15, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 47], "them": [0, 1, 2, 3, 4, 8, 12, 16, 17, 20, 22, 23, 26, 31, 34, 35, 44, 45], "instal": [0, 8, 12, 13, 15, 23, 25, 27, 28, 35, 37, 38, 39, 40, 42, 43, 44, 45], "If": [0, 1, 2, 3, 4, 8, 12, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "don": [0, 1, 2, 8, 10, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "t": [0, 1, 2, 4, 8, 10, 12, 18, 22, 27, 28, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "have": [0, 1, 2, 3, 4, 5, 8, 10, 12, 14, 15, 16, 18, 20, 22, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "we": [0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 16, 18, 22, 23, 24, 27, 29, 34, 39, 44], "recommend": [0, 1, 2, 4, 6, 7, 30, 40], "make": [0, 1, 2, 4, 9, 13, 20, 22, 24, 29, 30, 31, 32, 33, 41, 45, 47], "one": [0, 1, 2, 3, 4, 5, 8, 22, 24, 29, 30, 31, 33, 36, 37, 39, 41, 45], "so": [0, 1, 3, 4, 5, 8, 12, 15, 16, 22, 26, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "can": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "share": [0, 4, 8, 12, 15, 29, 35, 42], "definit": [0, 1, 2, 4, 8, 9, 12, 15, 21, 23, 24, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "across": [0, 1, 2, 15, 16, 33], "team": 0, "org": [0, 7, 9, 12, 28, 29, 30, 45], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 14, 15, 17, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "describ": [0, 1, 2, 24, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "below": [0, 2, 3, 8, 17, 30, 31, 45], "specifi": [0, 2, 3, 4, 5, 7, 8, 11, 14, 16, 17, 19, 20, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "project": [0, 13, 29, 31, 38, 40, 47], "py": [0, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 41, 42, 43, 45], "file": [0, 1, 2, 3, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 22, 23, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "from": [0, 1, 2, 3, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "setuptool": 0, "import": [0, 1, 2, 3, 4, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 31, 33, 39, 40, 45, 46], "foobar": [0, 31, 33, 41, 45], "entry_point": [0, 46], "my_schedul": 0, "my": 0, "create_schedul": [0, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "named_resourc": [0, 8, 45], "gpu_x2": 0, "my_modul": [0, 45, 46], "mai": [0, 1, 2, 3, 8, 11, 12, 20, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "implement": [0, 15, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "class": [0, 1, 4, 16, 18, 19, 28, 29, 31, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "interfac": [0, 2, 34, 35, 36, 37, 38, 39, 40, 43, 44, 46, 47], "function": [0, 2, 3, 4, 5, 8, 11, 12, 13, 18, 23, 24, 27, 28, 29, 36, 45], "should": [0, 2, 3, 4, 8, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "follow": [0, 2, 5, 8, 12, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "signatur": 0, "def": [0, 1, 2, 3, 4, 8, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 31, 43, 44, 45], "session_nam": [0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "str": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "kwarg": [0, 16, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "object": [0, 2, 3, 8, 9, 16, 22, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "return": [0, 2, 4, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 27, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "myschedul": 0, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "ad": [0, 1, 2, 8, 30, 31, 44, 46], "an": [0, 2, 3, 4, 6, 7, 8, 10, 12, 13, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "A": [0, 2, 5, 8, 31, 45, 47], "set": [0, 1, 3, 4, 5, 11, 12, 22, 23, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 44, 45, 47], "predefin": [0, 2], "spec": [0, 2, 3, 4, 5, 8, 9, 12, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 37, 39, 40, 41, 42], "ar": [0, 1, 2, 3, 4, 5, 7, 8, 11, 15, 17, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "given": [0, 2, 3, 8, 9, 16, 30, 31, 41, 43, 45], "string": [0, 3, 4, 8, 11, 29, 41, 45, 47], "particularli": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "when": [0, 1, 2, 3, 4, 5, 7, 8, 22, 25, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cluster": [0, 2, 12, 14, 21, 22, 23, 24, 27, 29, 39, 40, 42, 43, 44, 45, 47], "ha": [0, 2, 4, 5, 8, 10, 11, 13, 15, 17, 19, 20, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "fix": [0, 2], "instanc": [0, 2, 4, 5, 7, 8, 12, 22, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "type": [0, 1, 2, 3, 4, 5, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45], "For": [0, 1, 2, 4, 5, 7, 8, 10, 15, 16, 17, 20, 22, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "deep": 0, "learn": [0, 1, 2, 3, 10, 20, 27, 40], "train": [0, 4, 5, 7, 8, 13, 15, 16, 17, 18, 19, 20, 22, 25, 27, 29, 31, 33, 36, 40], "kubernet": [0, 2, 3, 5, 12, 14, 17, 22, 23, 25, 28, 29, 30, 31, 34, 35, 37], "aw": [0, 2, 25, 29, 34, 44], "compris": 0, "onli": [0, 2, 3, 5, 8, 20, 22, 23, 24, 28, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "p3": 0, "16xlarg": 0, "64": [0, 16, 17, 29, 45], "vcpu": 0, "8": [0, 5, 7, 9, 11, 12, 29, 33, 41], "gpu": [0, 2, 3, 5, 11, 20, 27, 29, 39, 40, 41, 42, 45], "488gb": 0, "want": [0, 1, 2, 3, 4, 12, 22, 29, 31, 35], "enumer": [0, 17], "shirt": [0, 4], "size": [0, 4, 10, 16, 17, 20, 29, 33, 36, 42, 45], "gpu_x1": 0, "cpu": [0, 2, 3, 4, 5, 11, 20, 22, 29, 31, 39, 40, 45], "1": [0, 1, 2, 3, 4, 5, 8, 11, 12, 14, 16, 17, 18, 20, 22, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "memmb": [0, 2, 3, 5, 11, 22, 29, 39, 40, 45], "61_000": 0, "16": [0, 3, 12, 16, 29], "2": [0, 2, 3, 5, 8, 11, 12, 17, 20, 27, 28, 29, 31, 33, 39, 40, 41, 42, 45], "122_000": 0, "gpu_x3": 0, "32": [0, 12, 20], "4": [0, 2, 5, 8, 12, 29, 31, 33, 39, 40, 41, 45], "244_000": 0, "gpu_x4": 0, "488_000": 0, "To": [0, 1, 2, 3, 8, 12, 14, 15, 16, 20, 22, 23, 24, 27, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "avail": [0, 8, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "need": [0, 1, 2, 3, 4, 7, 8, 13, 16, 22, 23, 24, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "via": [0, 1, 4, 7, 8, 11, 12, 14, 15, 16, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onc": [0, 1, 5, 8, 12, 22, 23, 24, 27, 29, 30, 34, 41], "manner": 0, "get_named_resourc": [0, 4, 45], "122000": 0, "appdef": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "role": [0, 2, 3, 4, 5, 8, 12, 23, 24, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "test_app": 0, "imag": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 22, 23, 24, 28, 31, 35, 36, 37, 39, 40, 42, 45, 47], "author": [0, 2, 3, 4, 10, 32], "cli": [0, 2, 4, 12, 15, 22, 25, 29, 30, 34, 35, 44, 46], "builtin": [0, 1, 2, 4, 13, 22, 23, 24, 27, 29, 30, 31], "possibl": [0, 1, 2, 4, 24, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "relev": [0, 41], "organ": [0, 8], "wai": [0, 1, 2, 5, 8, 12, 29, 33, 39, 40, 41], "user": [0, 1, 2, 3, 4, 10, 12, 29, 30, 31, 34, 35, 36, 39, 40, 41, 44, 45, 47], "see": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 22, 23, 24, 27, 28, 29, 30, 31, 35, 37, 38, 39, 40, 42, 44, 45, 47], "thei": [0, 2, 4, 5, 8, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "run": [0, 1, 2, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "entrypoint": [0, 2, 3, 5, 8, 10, 11, 12, 23, 24, 30, 34, 41, 44, 45, 46], "my_project": 0, "bar": [0, 2, 3, 8, 11, 14, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "had": 0, "directori": [0, 4, 8, 11, 12, 13, 16, 27, 29, 31, 33, 35, 36, 37, 41, 42, 43, 44, 45, 46, 47], "structur": [0, 1, 4, 8, 34, 45], "project_root": 0, "baz": [0, 31], "And": [0, 2], "singl": [0, 2, 4, 5, 8, 13, 14, 15, 16, 17, 18, 24, 26, 28, 29, 31, 43, 44, 45], "call": [0, 2, 4, 5, 8, 11, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "trainer": [0, 1, 2, 3, 4, 5, 7, 8, 10, 17, 19, 22, 28, 30, 33, 40, 45], "were": [0, 2, 3, 12, 29, 30], "foo": [0, 2, 3, 11, 14, 28, 29, 30, 31, 33, 39, 40, 43, 45, 47], "search": [0, 20, 31], "modul": [0, 2, 3, 4, 5, 8, 11, 13, 15, 16, 17, 20, 26, 28, 29, 30, 32, 33, 45, 46, 47], "all": [0, 1, 3, 8, 15, 17, 20, 21, 22, 23, 24, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "group": [0, 5, 13, 15, 27, 29, 30, 36, 44, 46], "found": [0, 3, 8, 12, 31, 44], "under": [0, 2, 3, 5, 8, 12, 17, 22, 29, 33, 46, 47], "prefix": [0, 3, 8, 29, 31, 36], "In": [0, 2, 3, 8, 22, 31, 33, 40, 47], "case": [0, 1, 2, 3, 4, 8, 22, 31, 40, 47], "would": [0, 1, 2, 4, 8, 12, 22, 29, 30, 31, 34, 41], "those": [0, 2, 28, 29, 31, 34, 41, 47], "__init__": [0, 16, 18, 19], "attempt": [0, 3, 19, 27, 29, 33, 36], "recurs": [0, 11], "namespac": [0, 14, 17, 20, 22, 29, 31, 39, 40], "without": [0, 1, 3, 15, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "howev": [0, 1, 2, 4, 10, 34, 40, 41, 45], "top": [0, 2, 29, 31, 37, 47], "level": [0, 2, 29, 30, 31, 33, 36, 39, 40, 46], "displai": [0, 39], "test": [0, 11, 13, 16, 17, 23, 28, 34, 39, 41, 43], "app": [0, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "own": [0, 1, 2, 3, 8, 11, 22, 29, 33, 34, 45, 46], "includ": [0, 1, 2, 3, 8, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "its": [0, 2, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "add": [0, 12, 14, 24, 29, 31, 34, 39, 40, 45, 47], "must": [0, 5, 8, 14, 16, 17, 18, 28, 29, 31, 35, 36, 37, 39, 40, 41, 42, 44, 45, 46, 47], "anoth": [0, 22, 33], "back": [0, 12, 33, 35], "e": [0, 2, 5, 8, 11, 15, 17, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "g": [0, 2, 5, 8, 11, 15, 17, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dist": [0, 4, 5, 8, 10, 12, 13, 15, 20, 22, 23, 29, 31, 40, 42, 45], "ddp": [0, 2, 4, 8, 12, 13, 15, 20, 22, 29, 30, 31, 42, 45], "versu": 0, "default": [0, 3, 5, 8, 12, 13, 14, 20, 22, 23, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 45], "two": [0, 2, 5, 12, 15, 22, 29, 33, 36, 39], "registri": [0, 29, 35, 36], "same": [0, 3, 8, 18, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "There": [0, 1, 2, 3], "overlap": 0, "differ": [0, 1, 2, 3, 4, 5, 8, 12, 14, 16, 31, 33, 40, 41, 44], "alias": 0, "concret": 0, "omit": [0, 2, 3, 8, 31], "shorter": 0, "underscor": 0, "_": [0, 14, 16, 18, 29], "_0": 0, "_1": 0, "etc": [0, 3, 15, 29, 34, 42], "exampl": [0, 2, 3, 6, 7, 8, 10, 12, 18, 19, 27, 28, 29, 31, 33, 35, 36, 37, 39, 40, 41, 42, 45, 47], "effect": [0, 4, 5, 17, 20, 29, 30, 35], "expos": [0, 30, 37, 39, 40, 41, 45, 46], "oppos": 0, "vanilla": 0, "11": [0, 12, 29], "3": [0, 3, 5, 8, 12, 16, 17, 18, 20, 23, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "util": [0, 1, 2, 3, 8, 10, 12, 13, 14, 15, 16, 17, 22, 25, 27, 29, 32, 35, 36, 38, 39, 40, 42, 44], "more": [0, 1, 2, 4, 5, 8, 10, 11, 12, 13, 17, 20, 22, 23, 24, 26, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "written": [1, 2, 5, 31], "ani": [1, 4, 8, 11, 12, 15, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "languag": 1, "well": [1, 3, 4, 7, 8, 10, 11, 22, 26, 27, 28, 30, 44], "librari": [1, 2, 8, 11, 12, 16, 20, 29, 32, 36, 43], "allow": [1, 2, 3, 4, 5, 11, 12, 16, 22, 24, 26, 29, 31, 33, 34, 41, 44, 45, 46, 47], "maximum": [1, 7, 29, 40, 44], "flexibl": [1, 2, 4], "do": [1, 2, 3, 4, 8, 10, 13, 15, 18, 29, 30, 39, 41, 45], "standard": [1, 20, 22, 24, 29, 45, 47], "start": [1, 7, 8, 9, 12, 19, 25, 27, 29, 30, 31, 35, 36, 38, 41, 45], "provid": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 17, 20, 22, 26, 28, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47], "consist": [1, 7, 15, 30, 33, 45], "built": [1, 3, 6, 12, 15, 17, 22, 24, 29, 33, 36, 47], "compon": [1, 6, 7, 9, 10, 11, 13, 15, 16, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 38, 39, 40, 41, 42, 44], "applic": [1, 2, 3, 5, 6, 8, 9, 13, 20, 27, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "inform": [1, 2, 4, 8, 10, 29, 30, 34, 35, 36, 39, 45, 46], "how": [1, 2, 3, 4, 6, 7, 8, 10, 12, 13, 15, 22, 23, 24, 28, 29, 30, 31, 35, 36, 40, 45, 46], "handl": [1, 2, 3, 18, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "manag": [1, 9, 22, 29, 36, 41, 45], "fsspec": [1, 2, 7, 9, 11, 14, 15, 16, 17, 18, 20, 22, 29, 46, 47], "pluggabl": [1, 46], "filesystem": [1, 2, 12, 22, 29, 35, 39, 40, 47], "just": [1, 4, 8, 12, 18, 22, 24, 29, 30, 31], "chang": [1, 3, 12, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "input": [1, 2, 17, 29, 33, 36, 45, 46], "output": [1, 11, 14, 17, 19, 20, 22, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "path": [1, 2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 16, 17, 18, 20, 22, 29, 30, 31, 33, 35, 36, 37, 39, 40, 41, 43, 45, 47], "access": [1, 2, 3, 4, 11, 12, 22, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "new": [1, 2, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "environ": [1, 4, 5, 11, 12, 20, 29, 31, 35, 36, 37, 38, 41, 43, 45, 46], "backend": [1, 2, 3, 5, 29, 30, 34, 46], "pytorch": [1, 2, 5, 7, 9, 11, 12, 13, 15, 16, 20, 22, 25, 27, 29, 39, 45], "lightn": [1, 2, 7, 16, 17, 18, 19, 20, 22], "out": [1, 2, 3, 4, 6, 8, 10, 11, 14, 17, 18, 29, 30, 33, 34, 44, 45], "box": [1, 4, 6, 8, 10, 11, 29, 34], "elsewher": 1, "seamless": 1, "integr": [1, 17, 22, 27], "remot": [1, 2, 5, 14, 16, 17, 20, 22, 25, 27, 34, 35, 36, 37, 39, 41, 47], "also": [1, 2, 3, 12, 13, 18, 29, 31, 33, 35, 36, 45], "easier": [1, 4, 9, 32], "transit": [1, 30], "distribut": [1, 2, 8, 10, 13, 15, 20, 21, 24, 25, 27, 28, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "elast": [1, 4, 5, 13, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lot": 1, "depend": [1, 2, 3, 8, 13, 15, 20, 22, 27, 29, 30, 34, 36, 39, 40, 41, 43, 45, 46, 47], "architectur": [1, 20], "which": [1, 2, 3, 5, 7, 8, 13, 15, 16, 19, 20, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "why": [1, 18, 28], "some": [1, 2, 3, 14, 15, 18, 22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "common": [1, 2, 4, 5], "choic": [1, 8], "pure": [1, 2, 4, 8], "light": 1, "ignit": 1, "log": [1, 7, 12, 15, 18, 20, 22, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "monitor": [1, 7, 20, 29, 36], "job": [1, 2, 4, 5, 11, 12, 13, 15, 23, 25, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "standalon": [1, 2, 8, 12, 18, 29, 30], "tensorboard": [1, 3, 7, 12, 19, 20, 22, 45], "sinc": [1, 2, 3, 4, 5, 7, 8, 28, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "nativ": [1, 37], "like": [1, 2, 4, 12, 17, 22, 24, 27, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "s3": [1, 2, 9, 11, 22, 29, 33, 36, 45, 46], "gc": 1, "view": 1, "complex": [1, 4, 24, 26], "about": [1, 2, 5, 10, 27, 30, 33, 40], "while": [1, 2, 12, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "within": [1, 5, 8, 22, 27, 29, 30, 33, 34, 35, 36, 45, 46], "period": [1, 7], "recov": 1, "failur": [1, 45], "restart": [1, 37, 45], "paramet": [1, 2, 3, 4, 5, 7, 8, 9, 11, 18, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "lose": 1, "progress": [1, 7, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "thing": [1, 3, 4, 29, 33], "transfer": [1, 12], "resum": 1, "command": [1, 3, 8, 10, 11, 29, 31, 37, 45, 46], "line": [1, 3, 10, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "argument": [1, 2, 5, 8, 10, 11, 20, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "transient": 1, "error": [1, 3, 11, 12, 13, 29, 41, 45], "continu": [1, 14, 16, 17, 26], "later": [1, 16, 31], "adjust": [1, 45], "rate": [1, 20], "load": [1, 2, 12, 16, 17, 20, 22, 29, 31, 33, 35, 36, 38], "less": [1, 29, 41, 44], "code": [1, 2, 4, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 31, 33, 36, 42, 44, 45, 46, 47], "better": [1, 4], "maintain": [1, 2], "number": [1, 5, 6, 8, 11, 12, 14, 16, 20, 29, 35, 36, 40, 41, 45, 46], "similar": [1, 2, 4, 29, 41, 45], "task": [1, 27, 39, 42], "captum": [1, 6, 15, 17], "analys": 1, "result": [1, 3, 4, 11, 17, 22, 28, 29, 30, 33, 34, 36, 41, 45, 46], "interact": [1, 9, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "jupyt": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29], "notebook": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 47], "commun": [1, 42], "hasn": 1, "format": [1, 4, 5, 8, 19, 28, 29, 30, 31, 33, 34, 44, 45], "here": [1, 3, 5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "coupl": 1, "option": [1, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 18, 20, 22, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "might": [1, 4, 29, 36], "ll": [1, 2, 3, 15, 22, 29, 31, 35, 39, 42], "state": [1, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dict": [1, 4, 5, 8, 9, 11, 19, 22, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41, 44, 45, 47], "ckpt": [1, 17, 22], "pt": [1, 9, 18], "modelcheckpoint": [1, 20], "hook": [1, 4], "work": [1, 8, 9, 10, 12, 22, 23, 29, 31, 36, 39, 40, 41, 42, 43, 44, 47], "harder": 1, "reusabl": [1, 4], "creat": [1, 2, 3, 4, 6, 7, 11, 12, 17, 18, 27, 29, 31, 34, 35, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47], "serializ": 1, "optim": [1, 18, 19, 29, 33], "execut": [1, 2, 5, 11, 12, 15, 27, 29, 33, 36, 39, 40, 41, 47], "perform": [1, 5, 9, 13, 15, 29, 45], "reli": [1, 3, 4, 5], "gil": 1, "These": [1, 4, 5, 9, 11, 15, 22, 26, 29, 45, 47], "complet": [1, 2, 4, 27, 30, 35, 36, 37, 39, 43, 44, 45], "self": [1, 3, 16, 18, 19, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "automat": [1, 29, 31, 39, 40, 45, 47], "convert": [1, 2, 8, 16, 17, 23, 24, 28, 45], "document": [1, 5, 11, 15, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "export": [1, 12, 18, 20, 22, 27, 29, 36], "quantiz": 1, "version": [1, 8, 12, 15, 18, 20, 29, 30, 39, 40, 46], "both": [1, 3, 4, 5, 8, 29, 30, 45], "full": [1, 3, 4, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 26, 29, 36, 47], "precis": 1, "consum": [1, 4, 12, 17, 22, 29, 30], "9": [1, 12, 27, 29, 30, 40], "0": [1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "separ": [1, 11, 13, 22, 24, 29, 37, 42], "It": [1, 2, 3, 4, 13, 15, 16, 17, 18, 20, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quit": [1, 7], "doesn": [1, 22, 28, 37, 39, 40, 41, 45], "widespread": 1, "adopt": 1, "upload": [1, 14, 18, 21, 22, 23, 24, 29, 36, 47], "api": [1, 2, 3, 4, 9, 13, 22, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "reason": [1, 30], "write": [1, 2, 3, 4, 8, 12, 16, 29, 30, 33, 36, 41, 45], "custom": [1, 3, 4, 6, 10, 22, 25, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "logic": [1, 2, 3, 13, 15, 31, 45], "deploi": [1, 9, 40], "build": [1, 2, 12, 29, 35, 36, 37, 39, 47], "server": [1, 3, 5, 7, 40, 45], "typic": [1, 2, 13, 22, 23, 24, 31, 33, 41, 45], "unit": 1, "other": [1, 2, 3, 4, 8, 12, 16, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "unittest": 1, "main": [1, 2, 4, 5, 11, 12, 13, 14, 15, 17, 20, 29, 33, 40, 41], "customapptest": 1, "testcas": 1, "test_main": 1, "none": [1, 3, 4, 5, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "src": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "dst": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "asserttru": 1, "high": [2, 30, 31], "behind": 2, "check": [2, 3, 12, 18, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quickstart": [2, 8, 12, 25, 39], "guid": [2, 8, 12, 25, 29, 40], "workspac": [2, 12, 13, 25, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "patch": [2, 13, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "tool": [2, 3, 9, 44, 45], "submit": [2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "stage": [2, 8, 11, 16, 19, 35, 38, 42], "ml": [2, 13, 25, 29, 36, 46], "platform": [2, 8, 24, 29], "abstract": [2, 22, 34, 41, 45, 46, 47], "uml": 2, "diagram": [2, 8, 30], "simpli": [2, 3, 8, 11, 14, 15, 31, 34, 41], "struct": 2, "actual": [2, 9, 13, 15, 22, 30, 31, 33, 34, 41, 45], "lingo": 2, "jobdefinit": 2, "yaml": [2, 21, 22, 23, 24, 28, 29, 39, 40], "disambigu": 2, "between": [2, 11, 16, 20, 29, 33, 35, 36, 45], "binari": [2, 3, 5, 11, 12, 29, 33, 41], "refer": [2, 3, 8, 15, 34, 43, 44, 45, 47], "understood": [2, 3], "simpl": [2, 3, 4, 8, 10, 12, 13, 14, 15, 17, 18, 29, 33, 34, 45], "echo": [2, 3, 8, 11, 12, 23, 24, 27, 35, 36, 38, 39, 40, 42, 44], "hello": [2, 3, 8, 10, 11, 13, 15, 23, 24, 25, 27, 31, 35, 36, 38, 39, 40, 44], "world": [2, 8, 11, 20, 31], "name": [2, 3, 5, 8, 9, 11, 12, 18, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "bin": [2, 3, 23, 24], "tmp": [2, 3, 11, 12, 14, 17, 20, 22, 27, 28, 29, 33, 41], "arg": [2, 3, 4, 8, 11, 12, 14, 16, 17, 20, 22, 23, 24, 28, 29, 30, 33, 34, 36, 41, 44, 45, 47], "num_replica": [2, 3, 4, 8, 11, 23, 28, 29, 41, 42, 45], "As": [2, 7, 8, 10, 13, 24, 33], "dataclass": 2, "encod": [2, 33, 45], "pass": [2, 3, 4, 5, 11, 12, 16, 20, 28, 29, 30, 31, 33, 37, 39, 40, 41, 44, 45, 47], "few": [2, 3, 8, 29, 33, 34], "varieti": [2, 5], "topolog": [2, 5], "mean": [2, 3, 19, 29, 31, 33, 35, 40], "multipl": [2, 3, 4, 5, 8, 23, 24, 30, 31, 34, 40, 41, 45], "repres": [2, 8, 28, 31, 43, 45], "non": [2, 4, 27, 34, 39, 45], "homogen": [2, 5], "coordin": [2, 5, 29, 33, 45], "mani": [2, 10, 30, 34], "worker": [2, 5, 11, 20, 22, 24, 29, 33, 43, 45], "doc": [2, 4, 8, 12, 13, 23, 28, 29, 35, 38, 39, 40, 42, 45, 47], "what": [2, 8, 22, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "field": [2, 3, 4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "good": 2, "scratch": [2, 4], "rather": [2, 3, 8, 13, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "templet": [2, 5, 8], "think": [2, 8], "conveni": [2, 3, 30, 45], "factori": [2, 4, 8, 28, 34, 43, 45, 46], "method": [2, 4, 8, 17, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "unlik": [2, 31, 45], "map": [2, 5, 8, 28, 30, 31, 33, 45, 47], "granular": 2, "vari": [2, 22], "abov": [2, 3, 8, 14, 27, 45], "readi": [2, 11, 25], "hardcod": 2, "data": [2, 5, 16, 17, 20, 22, 29, 33, 34, 36, 42, 45], "parallel": [2, 5, 15, 22, 29, 35, 45], "style": [2, 4, 5, 13, 15, 29, 31, 45], "node": [2, 3, 4, 5, 8, 15, 20, 22, 24, 28, 29, 30, 35, 39, 40, 41, 45], "jobnam": 2, "nnode": [2, 5, 8, 29], "int": [2, 3, 4, 5, 7, 8, 11, 14, 16, 18, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "script_arg": [2, 5, 8], "single_gpu": 2, "resourc": [2, 3, 5, 11, 12, 22, 23, 28, 29, 30, 34, 35, 36, 39, 40, 41, 44], "1024": [2, 5, 11, 22, 29, 45], "parameter": 2, "up": [2, 4, 8, 22, 27, 28, 29, 30, 31, 33, 38, 40, 41, 45], "effort": [2, 33], "than": [2, 4, 5, 8, 13, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "try": [2, 16, 30, 39], "over": [2, 5, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "gener": [2, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 36, 39, 44, 47], "everyth": [2, 3], "easi": [2, 5, 16, 22, 33], "cheap": 2, "base": [2, 3, 4, 8, 11, 12, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "repetit": 2, "protip": 2, "composit": 2, "achiev": 2, "purpos": [2, 8, 12, 17, 22, 29, 33, 41], "dsl": [2, 23, 24, 28], "section": [2, 8, 31, 45, 46], "understand": [2, 4, 6, 24, 26, 33], "context": [2, 8, 12, 29, 35, 47], "befor": [2, 3, 4, 7, 15, 22, 30, 45, 47], "brows": [2, 3, 8, 27, 29], "fit": [2, 3, 20, 29, 36], "doe": [2, 3, 8, 9, 11, 12, 13, 15, 16, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "exactli": 2, "expect": [2, 5, 34, 38, 40, 41, 44, 45, 47], "launch": [2, 3, 5, 7, 8, 12, 13, 14, 15, 17, 22, 23, 24, 26, 27, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onto": [2, 3, 14, 31, 34, 35], "app_spec": 2, "programmat": [2, 4, 12, 27, 29, 41, 43, 44, 47], "get_runn": [2, 8, 27, 30, 31], "appspec": [2, 35, 36, 37, 39, 43, 44], "list": [2, 4, 5, 8, 11, 14, 17, 18, 20, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "plug": 2, "workflow": [2, 3, 8, 11, 16, 27, 33], "specif": [2, 3, 5, 6, 24, 26, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "target": [2, 8, 17], "kubeflow": [2, 25, 26], "whatev": 2, "represent": 2, "kfp": [2, 21, 22, 23, 24], "containerop": [2, 24, 28], "accur": 2, "advanc": [2, 13, 21, 23, 24, 25, 29], "especi": [2, 4], "mini": 2, "control": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "flow": 2, "hpo": [2, 11, 19, 46], "sub": [2, 5, 29, 30, 32, 33], "inlin": [2, 24], "exact": [2, 3, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "semant": [2, 8, 30, 34, 47], "dynam": 2, "upstream": [2, 8], "take": [2, 3, 5, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "advantag": [2, 46], "featur": [2, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "tri": [2, 24], "canon": 2, "portabl": 2, "skip": [2, 17, 31, 47], "zero": [2, 17, 45], "echo_torchx": 2, "becaus": [2, 3, 13, 27, 31, 45], "essenti": [2, 3], "anywher": [2, 33], "agnost": [2, 20, 32], "fashion": [2, 22], "layer": [2, 12, 20, 29], "touch": [2, 11, 12], "infra": [2, 29, 36], "NOT": [2, 3, 16, 30, 31, 33, 41, 45], "boto3": [2, 35, 36], "input_path": [2, 14, 22], "session": [2, 30, 45, 46], "client": [2, 3, 12, 22, 23, 24, 34, 35, 36, 38, 39, 40, 41], "s3_input_path": 2, "split": [2, 13, 15], "bucket": [2, 9, 29, 33, 36], "kei": [2, 29, 31, 33, 36, 45, 46], "join": [2, 14, 16, 17, 18, 20, 22], "download_fil": 2, "torch": [2, 4, 5, 8, 13, 15, 16, 17, 18, 20, 29, 40, 45], "rest": 2, "breviti": [2, 3, 8, 31], "implicit": 2, "assumpt": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "One": [2, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "storag": [2, 3, 7, 16, 22, 29, 36, 39, 40, 46], "introduc": 2, "system": [2, 29, 35, 40], "framework": 2, "alreadi": [2, 3, 12, 19, 29, 30, 31], "io": [2, 5, 7, 9, 11, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "hood": [2, 5, 8, 33], "rewritten": 2, "pytorch_lightn": [2, 16, 18, 19, 20], "input_url": 2, "fs": [2, 14, 16, 18, 46, 47], "get_filesystem": 2, "open": [2, 14, 16, 17, 22, 23, 24, 33], "rb": [2, 14], "f": [2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "now": [2, 3, 12, 31], "compat": [2, 13, 15, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46], "store": [2, 5, 22, 29, 31, 33, 35, 36, 45], "variou": [2, 8, 15, 31, 46], "With": [2, 27, 40], "exist": [2, 4, 7, 12, 14, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "find": [2, 3, 31, 45], "pointer": 2, "ideal": 2, "time": [2, 3, 4, 7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "intend": [2, 24, 30, 33, 43, 45], "But": 2, "proper": 2, "perman": 2, "home": [2, 8, 12, 29, 30, 31], "even": [2, 3, 4, 41], "entir": [2, 45], "oss": [2, 18, 20], "until": [2, 4, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "happen": 2, "matur": 2, "commandlin": [3, 45, 47], "around": [3, 16, 22, 33], "runner": [3, 4, 8, 12, 25, 26, 27, 29, 31, 34, 35, 36, 43], "directli": [3, 4, 7, 8, 17, 22, 27, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "pipelin": [3, 7, 12, 15, 27, 29, 33, 45], "aka": [3, 30], "quickli": [3, 26], "iter": [3, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "incur": 3, "technic": 3, "cognit": 3, "overhead": 3, "deal": [3, 22, 33, 45], "doubt": 3, "help": [3, 4, 8, 12, 14, 15, 17, 20, 22, 29, 34, 38, 40, 45, 47], "consid": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "n": [3, 5, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "config": [3, 13, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "metric": [3, 4, 12, 19, 22, 25, 29, 30, 36, 46], "serv": [3, 12, 22, 25], "torchserv": [3, 9, 12, 18, 22], "get": [3, 8, 12, 16, 20, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "runopt": [3, 29, 30, 31, 34, 38, 40, 45, 47], "local_dock": [3, 12, 29, 31, 37, 45], "log_dir": [3, 12, 27, 29, 31, 41], "dir": [3, 7, 12, 13, 27, 29, 31, 41], "stdout": [3, 5, 11, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stderr": [3, 5, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica": [3, 5, 11, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "local_cwd": [3, 4, 5, 8, 12, 14, 17, 20, 25, 27, 29, 30, 31, 41, 44, 45], "slurm": [3, 4, 25, 34], "subcommand": [3, 8, 31, 46], "either": [3, 4, 8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "sched_nam": 3, "along": [3, 7, 8, 30], "cat": [3, 31], "my_trainer_spec": 3, "my_train": [3, 45], "detail": [3, 4, 20, 42], "chose": [3, 5, 29, 31, 34], "three": 3, "scheduler_arg": [3, 35, 39, 40], "known": [3, 11, 31, 34, 39], "run_opt": [3, 34, 38, 40], "run_config": 3, "each": [3, 4, 5, 11, 19, 20, 21, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "shown": [3, 31, 36], "comma": [3, 29, 31, 37, 45], "delimit": [3, 8, 20, 31, 45], "k": [3, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "v": [3, 15], "pair": [3, 31, 45], "seen": [3, 8], "usag": [3, 8, 12, 27, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "h": [3, 5, 8, 11, 29, 45], "msg": [3, 8, 11, 12, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45], "messag": [3, 8, 11, 27, 29, 34, 45], "show": [3, 8, 22, 27, 28, 29, 31], "exit": [3, 4, 7, 8, 12, 27, 29, 45], "put": [3, 14, 18, 27, 33], "togeth": [3, 23, 24, 44], "2022": 3, "06": 3, "15": [3, 12, 29], "08": 3, "57": 3, "info": [3, 4, 5, 11, 12, 17, 22, 23, 24, 27, 28, 29, 30, 34, 35, 36, 37, 39, 40, 44], "locat": [3, 11, 29, 36, 38, 41, 43, 46], "crls3hcpwjmhc": 3, "By": [3, 41], "block": [3, 4, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "finish": [3, 12, 29, 36, 43], "instead": [3, 4, 5, 12, 27, 29, 30, 33, 34, 39, 43, 45, 47], "print": [3, 10, 12, 13, 14, 16, 17, 18, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "url": [3, 9, 11, 14, 33, 34, 45], "form": [3, 8, 44, 45], "scheduler_nam": [3, 31], "job_id": [3, 46], "keep": [3, 4, 31, 34], "note": [3, 4, 5, 8, 11, 12, 14, 15, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "identifi": [3, 18, 29, 34, 35, 45, 47], "regist": [3, 5, 9, 11, 29, 30, 31, 34, 45], "debug": [3, 5, 29, 36], "request": [3, 12, 29, 30, 34, 36, 39, 40, 41, 44, 45], "hello_world": [3, 12, 42, 45], "metadata": [3, 12, 22, 28, 29, 45, 46], "env": [3, 5, 11, 27, 29, 31, 37, 41, 44, 45], "max_retri": [3, 5, 11, 29, 39, 40, 44, 45], "port_map": [3, 28, 45], "capabl": [3, 5, 39, 40, 45, 47], "retry_polici": [3, 45], "retrypolici": [3, 45], "popenrequest": [3, 41], "app_id": [3, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "echo_c944ffb2": 3, "torchx_asmtmyqj": 3, "torchx_kiuk": 3, "role_param": [3, 41], "replicaparam": [3, 41], "torchelastic_error_fil": 3, "json": [3, 33, 34, 43, 45, 46], "role_log_dir": [3, 41], "look": [3, 29, 30, 31, 33, 45], "faux": 3, "local": [3, 5, 7, 12, 14, 16, 17, 20, 22, 25, 29, 30, 33, 34, 36, 37, 44, 47], "subprocess": [3, 18, 41], "popen": [3, 41], "simul": [3, 46], "posix": 3, "process": [3, 5, 13, 14, 15, 17, 22, 29, 34, 41], "nevertheless": 3, "valuabl": 3, "insight": 3, "translat": 3, "particular": [3, 4, 8, 31], "invers": 3, "That": [3, 45], "app_handl": [3, 30, 34, 45], "recreat": [3, 34, 39], "descript": [3, 8, 12, 14, 17, 20, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "alwai": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "100": [3, 22, 30, 41], "wa": [3, 19, 27, 30, 34, 44, 45], "extent": [3, 30], "numer": [3, 33], "factor": 3, "describe_job": 3, "whether": [3, 5, 8, 29, 35, 36, 37, 39, 43, 45], "ignor": [3, 5, 11, 17, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "never": [3, 16, 19], "spot": [3, 29, 36], "filter": [3, 30], "down": [3, 44], "larg": [3, 29, 33, 46], "long": [3, 30], "retain": [3, 29, 36], "archiv": [3, 9, 18], "behalf": [3, 41], "get_log": 3, "obtain": 3, "manual": [3, 4, 27, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "retent": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "properli": [3, 13], "wrapper": [3, 16], "let": [3, 8, 12, 14, 22, 29, 30], "pull": [3, 12, 41, 45], "place": [3, 4, 16, 17, 20, 22, 29, 40, 42, 44], "pattern": [3, 29, 30, 37, 45], "explanatori": 3, "id": [3, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "tail": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "still": [3, 8, 33, 46], "regex": [3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "except": [3, 16, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "role_nam": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica_id": [3, 39, 40, 44, 45], "rank": [3, 20, 30, 33], "side": [3, 7], "appli": [3, 28, 29, 31, 37, 39, 40, 45, 47], "veri": [3, 6, 18, 29], "tax": 3, "host": [3, 5, 8, 11, 29, 30, 35, 36, 37, 39, 40, 41, 42, 45], "pleas": [3, 27, 29, 34, 42, 45], "judgment": 3, "status": [3, 45], "further": [3, 29, 34], "a5qvfhe1hyq2w": 3, "succeed": [3, 12, 29, 45], "d796ei2tdtest": 3, "em0iao2m90000": 3, "fail": [3, 12, 30, 37, 39, 45], "ew33oxmdg0123": 3, "design": [4, 25, 26, 27, 45], "deviat": 4, "necessari": [4, 15, 30, 34, 41, 45], "m": [4, 5, 8, 11, 12, 18, 22, 29], "docker": [4, 5, 8, 12, 22, 25, 34, 35, 36, 45, 47], "resolut": [4, 30], "isn": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "folder": [4, 14, 16, 18, 47], "regardless": 4, "img_nam": 4, "img_vers": 4, "reus": [4, 12, 16], "hard": [4, 32], "sort": 4, "manipul": 4, "imposs": 4, "convent": [4, 33], "avoid": [4, 29, 30], "where": [4, 5, 8, 11, 22, 26, 29, 31, 33, 36, 43, 44, 45, 46], "feel": 4, "statement": 4, "prefer": [4, 34, 39, 40, 41, 45], "trainer_test": 4, "_trainer": 4, "trainer_prod": 4, "10": [4, 12, 20, 29, 30, 40, 45], "ref": 4, "overview": [4, 25], "memori": [4, 5, 11, 29, 39, 40, 42, 44, 47], "alloc": [4, 22, 30, 34, 39, 40, 41, 44, 45], "independ": [4, 40], "schedul": [4, 5, 8, 11, 12, 13, 14, 15, 17, 20, 22, 23, 26, 27, 28, 30, 31, 32, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "behavior": [4, 7, 26, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "altern": [4, 30, 46], "merg": 4, "could": [4, 45], "ui": [4, 22, 23, 24, 28, 45, 46], "sidecar": 4, "servic": [4, 7, 22, 29, 34, 39, 40, 46], "re": [4, 22, 25, 27, 34, 39, 40, 45], "comput": [4, 18, 20, 35], "extend": [4, 46], "dictionari": [4, 29, 34, 36], "figur": [4, 18], "static": [4, 22, 42, 45], "pyre": [4, 16, 17, 18], "mypi": 4, "normal": [4, 12, 14, 15, 16, 22, 27, 29], "valid": [4, 11, 13, 15, 22, 30, 33, 34, 41, 45], "componenttestcas": 4, "ensur": [4, 13, 17, 20, 34], "pars": [4, 30, 33, 45], "stricter": 4, "component_test_bas": 4, "methodnam": 4, "runtest": 4, "sourc": [4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "run_compon": [4, 27, 30], "callabl": [4, 16, 41, 45, 46], "scheduler_param": [4, 30], "interv": [4, 30], "float": [4, 7, 8, 11, 18, 19, 20, 30, 31, 33, 45, 47], "timeout": [4, 7, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "appstatu": [4, 30, 45], "helper": [4, 47], "hide": 4, "poll": [4, 7, 30], "reach": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "compplet": 4, "max": [4, 5, 45], "fixtur": 4, "exercis": 4, "teardown": [4, 16], "deconstruct": 4, "after": [4, 8, 17, 22, 29, 31, 36, 45], "function_nam": [4, 30], "fn": [4, 45], "bash": [4, 11, 44], "script": [4, 5, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 29, 42, 44], "core": [4, 24, 45], "gang": [5, 37, 39, 40], "copi": [5, 11, 12, 22, 29, 37, 43, 45], "leverag": [5, 22, 24, 29], "express": [5, 29, 36], "overal": 5, "wise": 5, "wherea": 5, "num": [5, 29, 45], "assum": [5, 8, 17, 22, 29, 33, 35, 39, 40, 41], "x": [5, 18, 29, 42], "j": [5, 15, 20, 22, 29, 31, 42], "1x4": 5, "total": [5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 30, 41], "2x4": 5, "rdzv_port": [5, 29], "master": [5, 22, 40], "port": [5, 7, 29, 45], "29500": [5, 29], "cfg": [5, 12, 13, 14, 20, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "queue": [5, 14, 23, 28, 29, 31, 35, 39, 42], "autosc": 5, "minimum": [5, 30, 39, 40, 45], "5": [5, 11, 12, 14, 16, 17, 29, 45], "5x8": 5, "compar": 5, "torchelast": [5, 29, 45], "read": [5, 16, 22, 23, 24, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "ghcr": [5, 7, 9, 11, 12, 29, 31], "0dev0": [5, 7, 9, 11, 12, 29], "1x2": [5, 15, 20, 29, 31], "rdzv_backend": [5, 8, 29], "c10d": [5, 8, 29], "mount": [5, 11, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "bool": [5, 8, 9, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "fals": [5, 8, 9, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "tee": [5, 29], "multi": [5, 8, 24, 29, 35, 39, 40, 43], "rendezv": [5, 29], "rendezvous_endpoint": [5, 29], "rank_0_host": [5, 29], "instruct": [5, 12, 15, 17, 29, 39, 40], "free": [5, 29, 30, 34, 41, 45], "random": [5, 16, 17, 20, 29], "mutual": [5, 11, 29, 45], "exclus": [5, 11, 29, 45], "preced": [5, 11, 29, 31, 41], "overrid": [5, 29, 30, 31, 34, 41, 43, 45], "experimentnam": [5, 29], "runnam": [5, 29], "per": [5, 8, 11, 20, 22, 29, 33, 39, 41, 43, 44], "mb": [5, 11, 29, 45], "min_nnod": [5, 29], "nproc_per_nod": [5, 8, 29], "exce": [5, 29], "varibl": [5, 11, 29], "env1": [5, 11, 29, 37], "v1": [5, 8, 11, 12, 28, 29, 37, 39, 40, 45], "env2": [5, 11, 29, 37], "v2": [5, 8, 11, 29, 37, 45], "env3": [5, 11, 29, 37], "v3": [5, 8, 11, 29, 37, 45], "retri": [5, 11, 29, 39, 40, 41, 45], "rank0": [5, 29], "chosen": [5, 29], "ex": [5, 11, 29, 35, 36, 37, 39, 40, 45], "bind": [5, 11, 29, 35, 37, 39, 40, 42, 45], "volum": [5, 11, 29, 35, 36, 37, 39, 40, 45], "readonli": [5, 11, 29, 35, 37, 39, 40, 45], "preset": [5, 29], "flag": [5, 8, 29], "enabl": [5, 12, 29, 36, 38, 40, 44, 46], "std": [5, 29], "stream": [5, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "consol": [5, 29], "_torch_debug_flag": 5, "commonli": 5, "variabl": [5, 11, 29, 31, 34, 36, 37, 41, 43, 45], "cuda_launch_block": 5, "nccl_desync_debug": 5, "torch_distributed_debug": 5, "torch_show_cpp_stacktrac": 5, "model": [6, 7, 9, 10, 15, 16, 20, 22, 27, 28, 29, 33, 36, 41, 46], "often": [6, 10, 33, 46], "thu": [6, 39, 40, 45], "analyz": [6, 17], "render": [6, 7], "cloud": [7, 16, 22, 38, 39, 40, 42], "Or": [7, 31], "part": [7, 12, 15, 19, 21, 24, 26, 28, 30, 33, 45], "tensorboardlogg": [7, 20], "tutori": [7, 11, 17], "http": [7, 9, 12, 13, 14, 15, 17, 22, 23, 28, 29, 30, 35, 38, 39, 40, 42, 44, 45, 47], "intermedi": [7, 12, 29], "tensorboard_tutori": 7, "html": [7, 9, 29, 35, 44, 45], "logger": [7, 19, 20], "readthedoc": 7, "en": [7, 23, 28, 42], "stabl": [7, 29], "extens": 7, "logdir": 7, "3600": 7, "6006": 7, "start_on_fil": 7, "exit_on_fil": 7, "termin": [7, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "condit": 7, "caus": [7, 17], "trigger": 7, "correspond": [7, 28, 31, 45], "second": [7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 36, 44], "shutdown": 7, "illustr": 8, "Not": [8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "current": [8, 11, 12, 22, 27, 28, 29, 31, 36, 39, 41, 43, 44, 45, 46, 47], "collect": [8, 13, 15, 27, 29], "categori": 8, "our": [8, 12, 16, 17, 20, 23, 24, 29], "page": 8, "ve": [8, 22], "being": [8, 29, 47], "downstream": [8, 30], "o": 8, "sure": [8, 22, 30, 31, 45], "rule": [8, 45, 47], "thumb": 8, "familiar": 8, "yourself": 8, "pep": 8, "484": 8, "annot": [8, 34, 45], "primit": [8, 45], "primitive_kei": 8, "primitive_valu": 8, "var_arg": 8, "docstr": [8, 45], "googl": [8, 12, 29, 38, 45], "function_with_pep484_type_annot": 8, "autogener": 8, "pick": [8, 31], "simplifi": 8, "os": [8, 14, 16, 17, 18, 20, 22, 41, 46], "aws_p3": [8, 45], "2xlarg": [8, 45], "basenam": [8, 14], "rdzv_endpoint": 8, "localhost": [8, 12, 15, 25, 41], "5900": 8, "nprocs_per_nod": 8, "save": [8, 14, 16, 17, 18, 20, 27, 29, 33, 36], "torchx_param": 8, "tip": [8, 31, 45], "improv": [8, 45], "posit": [8, 29], "dep": [8, 27], "machin": [8, 10, 39, 40, 45], "bodi": [8, 31], "Then": [8, 31], "reflect": [8, 47], "correctli": [8, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "easiest": 8, "dryrun": [8, 9, 30, 47], "linter": 8, "dist_test": 8, "ident": [8, 29, 41, 46], "fact": 8, "walk": [8, 14, 16, 47], "though": 8, "basic": [8, 12, 25, 34, 38, 40, 47], "invok": [8, 41, 45], "regular": [8, 14, 15, 29, 36], "component_modul": 8, "component_fn": 8, "rel": [8, 12, 13, 29, 30, 36, 41], "d": [8, 12, 29, 31], "drop": [8, 31], "slightli": [8, 29], "syntax": [8, 24], "component_path": [8, 30], "bob": [8, 31], "absolut": [8, 29, 30, 36, 41], "shell": [8, 44], "expans": 8, "cwd": [8, 29, 31, 41], "cd": [8, 15, 31], "know": [8, 22, 29, 45], "straight": 8, "forward": [8, 18], "program": [8, 11, 12, 14, 17, 29], "doubl": [8, 13], "dash": 8, "param_nam": 8, "param1": 8, "argpars": [8, 12, 14, 17, 20, 22], "parser": [8, 12, 14, 17, 20, 22], "summari": [8, 19], "imagin": 8, "comp": 8, "i": [8, 16, 17, 27, 29], "b": [8, 31], "l": 8, "vararg": [8, 31], "true": [8, 12, 14, 17, 18, 20, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "k1": 8, "k2": 8, "k3": 8, "c": [8, 10, 11, 29, 31, 43], "henc": [8, 14, 31, 33, 45, 46], "end": [8, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "arg1": 8, "arg2": 8, "arg3": 8, "adapt": [8, 22, 23, 24, 26, 28, 35, 45, 46], "orchestr": [8, 27], "expositori": [8, 13], "quick": [8, 16], "practic": [8, 10], "aim": 9, "infer": [9, 18, 20, 22, 29, 36], "model_path": [9, 22], "management_api": [9, 22], "param": [9, 22, 27, 45], "endpoint": [9, 22, 29, 36], "8081": [9, 22, 45], "root": [9, 14, 15, 16, 31, 41, 43, 45], "loop": 10, "construct": [10, 30, 45, 46], "emb": 10, "limit": [10, 11, 14, 16, 22, 29, 33, 41, 46], "smaller": 10, "sy": [10, 11, 12, 14, 17, 20, 22, 29], "argv": [10, 11, 12, 14, 17, 20, 22, 29], "cp": [11, 42], "meant": 11, "materi": [11, 44], "glue": 11, "oper": [11, 13, 15, 22, 23, 27, 28, 29, 33, 45, 46], "meaning": 11, "sh": [11, 12, 23, 27, 28, 29, 39], "substitut": [11, 45], "destin": 11, "torchx_utils_python": [11, 29], "length": [11, 29], "booth": [11, 12], "x1": 11, "x2": 11, "trial_idx": 11, "tracker_bas": [11, 33], "evalu": [11, 29, 30, 36], "7": [11, 12, 29], "fsspecresulttrack": [11, 33], "outdir": 11, "uri": [11, 29, 33, 36], "tracker": [11, 12, 25, 27, 29, 33], "torchx_utils_binari": 11, "off": [12, 29], "anyth": [12, 29, 41], "writefil": [12, 29], "my_app": [12, 25, 29], "__name__": [12, 13, 14, 17, 20, 33], "__main__": [12, 13, 14, 17, 20, 33], "argumentpars": [12, 14, 17, 20, 22], "add_argu": [12, 14, 17, 20, 22], "person": [12, 31], "greet": 12, "parse_arg": [12, 14, 17, 20, 22], "friendli": 12, "my_compon": [12, 30, 31], "latest": [12, 28, 29, 35, 36, 39, 40, 41, 42, 45], "greeter": 12, "2024": [12, 27, 29], "09": [12, 27, 29], "17": [12, 27, 29], "23": [12, 27, 29], "18": [12, 39, 40], "temporari": [12, 27, 29], "delet": [12, 27, 29], "preserv": [12, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "torchx_pqo87ixa": 12, "wait": [12, 27, 29, 30, 34, 36, 43], "19": 12, "tv443npdvbthz": 12, "won": [12, 29, 44], "colab": [12, 29], "com": [12, 15, 22, 29, 35, 36, 37, 38, 39, 40, 42, 44, 45, 47], "dockerfil": [12, 29, 47], "0rc1": 12, "34": [12, 29, 40], "driver": [12, 45], "intern": [12, 27], "99b": 12, "0s": 12, "7s": 12, "dockerignor": [12, 47], "2b": 12, "425b": 12, "sha256": [12, 29, 47], "a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3": 12, "resolv": [12, 29, 30, 41, 45], "3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c": 12, "21kb": 12, "889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f": 12, "0b": 12, "189b": 12, "1s": 12, "25kb": 12, "4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca": 12, "34mb": 12, "26": [12, 29], "70mb": 12, "3s": 12, "d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726": 12, "857b": 12, "2s": 12, "143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907": 12, "00gb": 12, "20": [12, 29], "97mb": 12, "5s": 12, "6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203": 12, "94mb": 12, "6s": 12, "d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71": 12, "21": [12, 29, 39], "46mb": 12, "eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa": 12, "132b": 12, "extract": [12, 16, 29, 36], "06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77d": 12, "257b": 12, "9s": 12, "f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6": 12, "71gb": 12, "15mb": 12, "49mb": 12, "83mb": 12, "c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932": 12, "92b": 12, "4s": 12, "30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2": 12, "352b": 12, "909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233": 12, "341": 12, "29mb": 12, "89": 12, "13mb": 12, "110": 12, "10mb": 12, "45": 12, "09mb": 12, "63": 12, "96mb": 12, "175": 12, "11mb": 12, "83": 12, "89mb": 12, "109": 12, "05mb": 12, "8s": 12, "126": 12, "88mb": 12, "211": 12, "81mb": 12, "268": [12, 29], "44mb": 12, "144": 12, "163": 12, "58mb": 12, "354": 12, "42mb": 12, "187": 12, "205": 12, "52mb": 12, "227": 12, "54mb": 12, "6": [12, 29, 39], "324": 12, "01mb": 12, "246": 12, "447": 12, "74mb": 12, "265": 12, "284": 12, "16mb": 12, "303": 12, "04mb": 12, "533": 12, "73mb": 12, "424": 12, "67mb": 12, "f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800": 12, "563": 12, "38kb": 12, "88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968": 12, "556": 12, "96kb": 12, "627": 12, "528": 12, "48mb": 12, "714": 12, "08mb": 12, "804": 12, "26mb": 12, "631": 12, "24mb": 12, "12": [12, 29, 41], "899": 12, "68mb": 12, "13": [12, 16, 27, 29, 40], "736": 12, "14": [12, 18], "10gb": 12, "836": 12, "76mb": 12, "19gb": 12, "27gb": 12, "938": 12, "37gb": 12, "04gb": 12, "46gb": 12, "55gb": 12, "14gb": 12, "63gb": 12, "22": [12, 29, 39, 40], "25gb": 12, "36gb": 12, "24": [12, 17, 29, 36, 40, 45], "56gb": 12, "28": [12, 27, 29], "67gb": 12, "30": [12, 29, 36, 43], "77gb": 12, "87gb": 12, "33": 12, "98gb": 12, "36": [12, 29], "38": [12, 29], "25": [12, 29], "107": 12, "9d9fa455e0781fdf3c31a167be389dcca28ff3963c4bc64aa9e0dd5f9b73326c": 12, "disabl": [12, 29, 31, 36, 44], "warn": [12, 27, 29, 34], "fall": 12, "404": 12, "tag": [12, 27, 29, 35, 36, 47], "amp": 12, "fromimag": 12, "deni": 12, "repositori": [12, 29, 35, 36, 37, 39, 40, 47], "39": [12, 27, 29], "login": [12, 47], "step": [12, 22, 24, 25, 45, 46], "gt": [12, 27, 29], "9d9fa455e078": 12, "5e2bee314bac": 12, "label": [12, 17, 29, 36], "171d9d56d726": 12, "remov": [12, 27, 29, 30, 39], "a3f23864b182": 12, "successfulli": [12, 29, 30, 45], "a3f23864b1828796d32cfd7bf208ed0f7fb0d40161c4f9d40353a9b3dc308eaa": 12, "origin": [12, 29, 35, 36, 37, 39, 43, 44, 45], "27": [12, 29], "ht4qx5sv7hmrbc": 12, "push": [12, 29, 35, 36, 37, 39, 40, 47], "premad": 12, "discov": 12, "spmd": 12, "29": [12, 29], "c31c993343e6": [12, 29], "daf1a773479": 12, "d48689ba95d6": 12, "43": 12, "8067f3439367": 12, "8067f34393673970175578c1b5189f3ca20981976885cd29c5c486536d2d26ef": 12, "m2w4xscjv1cqj": 12, "click": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "download": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 36, 41, 42], "minim": [13, 15], "initi": [13, 15, 16, 20, 29, 36, 43, 45], "all_reduc": [13, 15, 29], "enough": [13, 15], "compute_world_s": [13, 15], "submodul": 13, "e2": [13, 25], "diff": [13, 47], "hydra": 13, "stack": 13, "been": [13, 17, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "seriou": 13, "omegaconf": 13, "dictconfig": 13, "multiprocess": 13, "record": [13, 19, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "to_yaml": 13, "throw": [13, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "rais": [13, 19, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "runtimeerror": 13, "compos": [13, 14, 16], "ipython": 13, "pwd": 13, "ab": 13, "cc": 13, "jupyter_notebook": 13, "initialize_config_modul": 13, "config_modul": 13, "config_nam": 13, "minut": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 44], "000": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "ipynb": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "galleri": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "sphinx": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "torchvis": [14, 15, 16, 18, 29], "reupload": [14, 15], "datapreproc": [14, 17, 22], "cs231n": [14, 22], "stanford": [14, 22], "edu": [14, 22], "tini": [14, 15, 16, 22], "imagenet": [14, 15, 16, 22], "200": [14, 18, 22], "zip": [14, 15, 21, 22], "output_path": [14, 16, 17, 20, 22, 29, 36], "tarfil": [14, 16], "tempfil": [14, 17, 20], "zipfil": 14, "pil": [14, 16], "transform": [14, 16, 22, 26, 28], "dataset": [14, 15, 17, 18], "is_image_fil": [14, 16], "tqdm": [14, 16, 29], "tar": [14, 16, 45], "gz": [14, 16], "download_and_extract_zip_arch": 14, "r": [14, 15, 16, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "zip_ref": 14, "extractal": [14, 16], "temporarydirectori": [14, 17, 20], "tmpdir": [14, 16, 17, 18, 20], "img_root": [14, 16, 41, 45], "splitext": 14, "totensor": [14, 16], "topilimag": [14, 16], "image_fil": [14, 16], "fname": [14, 16], "append": [14, 16, 17, 20, 22, 45], "len": [14, 16, 18], "break": [14, 30], "minit": [14, 16], "2000": [14, 16], "tar_path": [14, 16], "pack": [14, 16], "mode": [14, 16, 29, 30, 36, 39, 40], "w": [14, 33], "arcnam": 14, "rpath": [14, 16, 18], "get_fs_token_path": [14, 16, 18], "assert": [14, 16, 17, 18, 27], "rm": 14, "global": [14, 17, 20, 22], "sphinx_gallery_thumbnail_path": [14, 16, 17, 18, 19, 20, 22, 23, 24], "_static": [14, 16, 17, 18, 19, 20, 22, 23, 24], "img": [14, 16, 17, 18, 19, 20, 22, 23, 24, 41, 47], "png": [14, 16, 17, 18, 19, 20, 22, 23, 24], "demonstr": [15, 33], "themselv": 15, "notic": [15, 35, 38, 42, 47], "pip": [15, 25, 29, 35, 38, 39], "git": [15, 29, 36], "clone": [15, 29, 36], "github": [15, 22, 39, 40, 44, 46], "torchx_vers": 15, "sed": 15, "checkout": [15, 27, 29, 47], "dev": [15, 25, 29, 35, 39, 40, 42, 45], "txt": [15, 29, 43, 45, 46], "repo": [15, 29, 36, 47], "interpret": [15, 18, 22, 25, 41, 45], "sever": [15, 45], "ism": 15, "respect": [15, 34, 47], "profil": [15, 20, 29, 36], "examples_apps_python": 15, "examples_apps_jupyt": 15, "numpi": [16, 17, 29], "pl": [16, 18, 20], "dataload": [16, 17], "imagefoldersamplesdataset": 16, "imagefold": 16, "sampl": [16, 46], "num_sampl": [16, 20], "super": [16, 18, 19], "__len__": 16, "fixm": [16, 17, 18, 31], "attribut": [16, 17, 43], "test_d": 16, "train_d": 16, "val_d": 16, "tinyimagenetdatamodul": [16, 17, 20], "lightningdatamodul": 16, "data_dir": [16, 17, 20], "batch_siz": [16, 17, 20], "loader": 16, "img_transform": 16, "val": [16, 18, 20], "train_dataload": 16, "val_dataload": 16, "test_dataload": [16, 17], "download_data": [16, 17, 20], "remote_path": [16, 18], "unextract": 16, "isdir": 16, "data_path": [16, 17, 20, 22], "create_random_data": [16, 17, 20], "num_imag": 16, "250": 16, "fill": [16, 31, 34], "randomli": 16, "64x64": 16, "preprocess": [16, 17, 22], "train_path": 16, "class1_train_path": 16, "class1": 16, "class2_train_path": 16, "class2": 16, "val_path": 16, "class1_val_path": 16, "class2_val_path": 16, "test_path": 16, "class1_test_path": 16, "class2_test_path": 16, "makedir": [16, 17, 20], "fileexistserror": 16, "rang": [16, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pixel": 16, "rand": 16, "255": 16, "im": 16, "fromarrai": 16, "astyp": 16, "uint8": 16, "rgb": 16, "rand_image_": 16, "jpeg": 16, "process_imag": 16, "lib": [16, 18, 19, 29, 45], "seri": [17, 29, 36], "gradient": [17, 22], "overlai": [17, 29, 47], "ai": 17, "cifar_torchvision_interpret": 17, "load_path": [17, 20, 22], "last": [17, 22, 45], "viewer": [17, 28], "visual": 17, "equal": [17, 45], "benefit": 17, "swap": 17, "itertool": 17, "tinyimagenetmodel": [17, 18, 20], "otherwis": [17, 20, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "crash": [17, 45], "np": 17, "attr": 17, "integratedgradi": 17, "viz": 17, "checkpoint": [17, 20, 22, 29, 36], "weight": [17, 33], "analysi": 17, "convert_to_rgb": 17, "arr": 17, "tensor": [17, 18, 29], "ndarrai": 17, "arrai": 17, "squeez": 17, "swapax": 17, "shape": 17, "invalid": [17, 30, 45], "produc": [17, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "init": [17, 20], "load_from_checkpoint": [17, 20], "checkpoint_path": [17, 20], "els": [17, 20, 22], "ig": 17, "first": [17, 22, 23, 24, 29, 30, 34, 45, 46, 47], "islic": 17, "unsqueez": 17, "dim": 17, "zero_grad": 17, "attr_ig": 17, "delta": 17, "baselin": 17, "return_convergence_delta": 17, "count_nonzero": 17, "toi": [17, 18], "sometim": 17, "due": [17, 39, 41], "fig": 17, "axi": 17, "visualize_image_attr": 17, "blended_heat_map": 17, "sign": [17, 29, 37], "show_colorbar": 17, "titl": 17, "out_path": [17, 18], "ig_": 17, "heatmap": 17, "wb": 17, "savefig": 17, "regress": 18, "tupl": [18, 28, 35, 36, 39, 40, 44, 45, 47], "jit": 18, "nn": 18, "torchmetr": 18, "accuraci": [18, 30, 33], "resnet": [18, 29], "basicblock": [18, 29], "lightningmodul": 18, "linear": [18, 29], "net": [18, 27], "layer_s": 18, "lr": [18, 20], "001": 18, "small": [18, 29, 39, 40], "tweak": 18, "match": [18, 31, 44], "tinyimagenet": 18, "avgpool": 18, "adaptiveavgpool2d": 18, "fc": [18, 29], "out_featur": [18, 29], "train_acc": [18, 20], "val_acc": [18, 20], "training_step": 18, "batch": [18, 20, 25, 34], "batch_idx": 18, "_step": 18, "validation_step": 18, "val_batch": 18, "step_nam": 18, "acc_metr": 18, "y": 18, "y_pred": 18, "loss": 18, "cross_entropi": 18, "_loss": 18, "_acc": 18, "todo": 18, "aivan": 18, "fb": 18, "cannot": [18, 27, 41, 42, 45], "configure_optim": 18, "adamw": 18, "export_inference_model": [18, 20], "torchscript": 18, "serial": [18, 33, 43], "dure": [18, 29, 36, 41, 45], "jite": 18, "jit_path": 18, "model_jit": 18, "model_nam": [18, 22], "tiny_image_net": [18, 22], "mar_path": 18, "mar": [18, 22], "handler": 18, "durat": [19, 29, 36], "ax": 19, "simpleloggingprofil": [19, 20], "action": [19, 20, 30], "report": [19, 30], "duration_": 19, "event": [19, 29, 44], "current_act": 19, "action_nam": 19, "valueerror": [19, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "monoton": 19, "stop": [19, 30, 45], "end_tim": 19, "start_tim": 19, "pop": 19, "log_metr": 19, "runtim": [20, 28, 29, 31, 32, 33, 37, 39, 40, 41, 45, 47], "epoch": [20, 22], "log_path": [20, 22], "skip_export": 20, "1x1": [20, 22], "addit": [20, 29, 31, 36, 40, 41, 45], "callback": 20, "store_tru": 20, "narg": 20, "mlp": 20, "hidden": 20, "neural": 20, "get_model_checkpoint": 20, "behav": [20, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "deadlock": 20, "train_loss": 20, "dirpath": [20, 43], "save_last": 20, "checkpoint_callback": 20, "save_dir": 20, "lightning_log": [20, 22], "num_nod": 20, "group_world_s": 20, "acceler": 20, "cuda": 20, "is_avail": 20, "devic": [20, 29, 35, 37, 39, 40, 41, 45], "local_world_s": 20, "strategi": 20, "max_epoch": 20, "acc": 20, "intro": 21, "examples_pipelines_python": 21, "examples_pipelines_jupyt": 21, "someth": [22, 26], "dist_ddp": 22, "utils_copi": 22, "utils_python": 22, "container_from_app": [22, 24, 28], "modifi": [22, 45, 46], "rebuild": [22, 47], "awai": 22, "blob": [22, 33, 40], "readm": [22, 29], "md": [22, 40], "svc": 22, "somewher": 22, "copy_app": 22, "next": 22, "raw": [22, 30, 39, 45], "previou": [22, 27, 45, 46], "ahead": 22, "fulli": [22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "processed_data_path": 22, "datapreproc_app": 22, "fast": [22, 25], "autom": 22, "__file__": 22, "dirnam": 22, "logs_path": 22, "models_path": 22, "trainer_app": 22, "3000": 22, "ui_metadata": [22, 28], "serve_app": 22, "initial_work": 22, "interpret_path": 22, "interpret_app": 22, "track": [22, 25, 29, 39, 44, 46], "set_tti": 22, "respons": [22, 34, 45], "compil": [22, 23, 24, 28], "pipeline_func": [22, 23, 24, 28], "package_path": [22, 23, 24, 28], "rt": [22, 23, 24], "advanced_pipelin": 22, "resource_from_app": [23, 28], "volcano": [23, 28, 29, 31, 39], "echo_app": [23, 24], "alpin": [23, 24, 35, 36, 39, 40, 42], "instanti": [23, 24, 34, 41], "echo_contain": [23, 24], "baseop": 23, "sdk": [23, 24, 28, 29], "chain": [23, 24, 33], "dist_pipelin": 23, "introductori": 24, "cross": 24, "mechan": [24, 37, 45, 46], "wherev": 24, "component_from_app": [24, 28], "convers": 24, "intro_pipelin": 24, "univers": 25, "launcher": 25, "research": 25, "product": 25, "concept": [25, 29, 39, 40], "torchxconfig": [25, 46], "mcad": [25, 29, 34], "rai": [25, 29, 34], "sagemak": [25, 29, 34], "ibm": [25, 34], "spectrum": [25, 34], "lsf": [25, 29, 34], "gcp": [25, 29, 34], "airflow": [25, 26], "deploy": [26, 40, 41], "assembl": 26, "easili": 27, "No": 27, "special": 27, "datetim": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pendulum": 27, "dagrunst": 27, "taskinstancest": 27, "dagruntyp": 27, "dag": 27, "decor": 27, "data_interval_start": 27, "2021": [27, 29], "tz": 27, "utc": 27, "data_interval_end": 27, "timedelta": 27, "dai": [27, 29, 44], "virtualenv": [27, 44], "task_id": 27, "hello_torchx": 27, "run_torchx": 27, "statu": [27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "wait_interv": [27, 30], "raise_for_statu": [27, 45], "didn": 27, "succe": 27, "final": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "log_lin": [27, 30], "make_uniqu": 27, "dag_id": 27, "example_python_oper": 27, "schedule_interv": 27, "start_dat": 27, "catchup": 27, "run_job": 27, "dagrun": 27, "create_dagrun": 27, "execution_d": 27, "data_interv": 27, "run_typ": 27, "ti": 27, "get_task_inst": 27, "get_task": 27, "ignore_ti_st": 27, "success": 27, "ipykernel_4021": 27, "454499020": 27, "removedinairflow3warn": 27, "deprec": [27, 30, 45], "futur": [27, 30, 45, 46], "releas": [27, 39, 40, 46], "17t23": 27, "47": [27, 29], "088": 27, "0000": 27, "taskinst": 27, "2612": 27, "met": 27, "dep_context": 27, "requeueabl": 27, "lt": [27, 29], "vdz046223nt34c": 27, "manual__2021": 27, "13t00": 27, "00": 27, "093": 27, "094": 27, "2865": 27, "095": 27, "2946": 27, "queued_dur": 27, "104": 27, "2888": 27, "_pythondecoratedoper": 27, "641": 27, "3131": 27, "var": [27, 29, 34, 41], "airflow_ctx_dag_own": 27, "airflow_ctx_dag_id": 27, "airflow_ctx_task_id": 27, "airflow_ctx_execution_d": 27, "airflow_ctx_dag_run_id": 27, "queu": 27, "089672": 27, "644": 27, "731": 27, "endgroup": 27, "48": [27, 29], "347": [27, 29], "72": 27, "350": 27, "local_schedul": [27, 34, 41], "771": 27, "351": 27, "777": 27, "torchx_tmc4gx24": 27, "456": 27, "240": 27, "valu": [27, 29, 31, 33, 36, 39, 40, 41, 45, 46, 47], "461": 27, "340": 27, "post": 27, "462": 27, "352": 27, "mark": 27, "run_id": [27, 46], "20210913t000000": 27, "20240917t232847": 27, "end_dat": 27, "20240917t232848": 27, "queued_at": 27, "hostnam": 27, "dkgqmwv5kzretlg0t13gj4hs5g": 27, "xx": 27, "cloudapp": 27, "goe": 27, "unspecifi": 28, "app_def": 28, "service_account": [28, 29, 39, 40], "resourceop": 28, "containerfactori": 28, "equival": [28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "load_component_from_": 28, "www": [28, 42], "legaci": 28, "component_spec_from_app": 28, "notabl": 28, "protocol": 28, "log_level": 29, "cancel": [29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "job_nam": [29, 36], "overwrit": [29, 30, 41], "extra": [29, 30, 45, 47], "itself": 29, "52": 29, "torchx_gfuu7tji": 29, "53": 29, "mhnbpcwr59rn2": 29, "55": 29, "02": 29, "34b2b4e966d3": 29, "28f05a8742c5": 29, "4af5064ebb2c": 29, "4af5064ebb2c6fa4027d373be8540e2ac3f3da68b7bf5b0ae8bf0a234cdc6ad5": 29, "hfstzzzfnd2fm": 29, "relat": [29, 45], "interest": 29, "dist_app": [29, 42], "init_process_group": 29, "gloo": [29, 42], "am": 29, "get_rank": 29, "get_world_s": 29, "2x2": [29, 42], "fae2e9b14916": 29, "4ce46cbb343a": 29, "a20cc84a8517": 29, "a20cc84a8517b5b8c0b7a95730dcef03f05bc22c653f06dc5b71c6d202a8c7a2": 29, "omp_num_thread": 29, "overload": 29, "tune": [29, 33], "358": 29, "r7s41p4ksn6nrd": 29, "aws_batch": [29, 35, 36], "basi": [29, 39], "daemon": [29, 47], "image_repo": [29, 35, 36, 37, 39, 40], "partit": [29, 44], "copy_env": [29, 37], "privileg": [29, 35, 37, 39, 40], "quiet": [29, 35, 36, 37, 39], "glob": [29, 37], "foo_": [29, 37], "eiher": [29, 37], "semicolon": [29, 37], "ones": [29, 31, 37, 45], "elev": [29, 35, 37], "permiss": [29, 35, 37, 45], "suppress": [29, 35, 36, 37, 39], "verbos": [29, 35, 36, 37, 39], "prepend_cwd": [29, 41], "auto_set_cuda_visible_devic": [29, 41], "prepend": [29, 41], "cuda_available_devic": [29, 41], "assign": [29, 41, 45], "noth": [29, 30, 31, 41], "count": [29, 39, 40, 41], "comment": [29, 44], "constraint": [29, 44], "mail": [29, 44], "job_dir": [29, 44, 47], "hour": [29, 44], "torchxslurmjobdir": [29, 44], "priority_class": [29, 39], "account": [29, 39, 40, 44], "pod": [29, 39, 40], "priorityclass": [29, 39, 40], "kubernetes_mcad": [29, 40], "prioriti": [29, 35, 40, 45], "priority_class_nam": [29, 40], "image_secret": [29, 40], "coscheduler_nam": [29, 40], "network": [29, 36, 40, 41, 42], "higher": [29, 33, 35, 40], "integ": [29, 40], "admin": [29, 40], "openshift": [29, 40], "secret": [29, 40], "privat": [29, 36, 40, 42], "co": [29, 40], "beyond": [29, 33, 40], "share_id": [29, 35], "job_role_arn": [29, 35], "execution_role_arn": [29, 35], "usernam": [29, 35, 36], "getpass": [29, 35, 36], "getus": [29, 35, 36], "polici": [29, 35, 39, 41, 45], "9999": [29, 35], "amazon": [29, 35, 36, 45], "arn": [29, 35, 36], "iam": [29, 35, 36], "ec": [29, 35], "agent": [29, 35], "xdg": 29, "aws_sagemak": [29, 36], "instance_typ": [29, 36], "instance_count": [29, 36], "keep_alive_period_in_second": [29, 36], "volume_s": [29, 36], "volume_kms_kei": [29, 36], "max_run": [29, 36], "input_mod": [29, 36], "output_kms_kei": [29, 36], "base_job_nam": [29, 36], "subnet": [29, 36], "security_group_id": [29, 36], "model_uri": [29, 36], "model_channel_nam": [29, 36], "metric_definit": [29, 36], "encrypt_inter_container_traff": [29, 36], "use_spot_inst": [29, 36], "max_wait": [29, 36], "checkpoint_s3_uri": [29, 36], "checkpoint_local_path": [29, 36], "debugger_hook_config": [29, 36], "enable_sagemaker_metr": [29, 36], "enable_network_isol": [29, 36], "disable_profil": [29, 36], "max_retry_attempt": [29, 36], "source_dir": [29, 36], "git_config": [29, 36], "hyperparamet": [29, 36], "container_log_level": [29, 36], "code_loc": [29, 36], "training_repository_access_mod": [29, 36], "training_repository_credentials_provider_arn": [29, 36], "disable_output_compress": [29, 36], "enable_infra_check": [29, 36], "artifact": [29, 36, 46, 47], "ec2": [29, 35, 36, 45], "c4": [29, 36], "xlarg": [29, 36], "instance_group": [29, 36], "warm": [29, 36], "pool": [29, 36], "subsequ": [29, 36], "gb": [29, 33, 36], "km": [29, 36], "encrypt": [29, 36], "eb": [29, 36], "attach": [29, 36, 40, 47], "60": [29, 36], "algorithm": [29, 36], "estim": [29, 36], "timestamp": [29, 36], "vpc": [29, 36], "secur": [29, 36], "pre": [29, 31, 36], "channel": [29, 36], "traffic": [29, 36], "persist": [29, 36, 39, 40, 45, 46], "emit": [29, 36], "debugg": [29, 36], "unless": [29, 36, 43], "region": [29, 36], "isol": [29, 36, 44], "move": [29, 36, 45], "asid": [29, 36], "branch": [29, 36, 40], "commit": [29, 36], "2fa_en": [29, 36], "password": [29, 36], "token": [29, 36], "lambda": [29, 36], "credenti": [29, 35, 36, 38], "authent": [29, 35, 36, 38, 47], "compress": [29, 36], "gcp_batch": [29, 38], "central1": [29, 38], "cluster_config_fil": [29, 43], "cluster_nam": [29, 43], "dashboard_address": [29, 43], "127": [29, 43], "8265": [29, 43], "dashboard": [29, 43], "address": [29, 43], "against": [29, 43, 45], "lsf_queue": [29, 42], "jobdir": [29, 42], "container_workdir": [29, 42], "host_network": [29, 42], "shm_size": [29, 42], "64m": [29, 42], "shm": [29, 42], "timm_app": 29, "timm": 29, "resnet18": 29, "cuda11": 29, "cudnn8": 29, "newli": [29, 47], "40": 29, "46": 29, "c3f17e5ac010": 29, "5dfa98496545": 29, "py3": 29, "whl": 29, "satisfi": 29, "opt": [29, 45], "conda": [29, 44], "python3": 29, "site": 29, "huggingfac": 29, "hub": 29, "huggingface_hub": 29, "kb": 29, "safetensor": 29, "cp37": 29, "cp37m": 29, "manylinux_2_17_x86_64": 29, "manylinux2014_x86_64": 29, "436": 29, "pyyaml": 29, "typing_extens": 29, "importlib": 29, "importlib_metadata": 29, "2023": 29, "143": 29, "42": 29, "61": 29, "filelock": 29, "zipp": 29, "idna": 29, "urllib3": 29, "chardet": 29, "certifi": 29, "2017": 29, "pillow": 29, "49": 29, "031c7ece2d28": 29, "54": 29, "2fec9d7b0b4b": 29, "c249b92c283b": 29, "56": 29, "8c8d1a4d8ee": 29, "8c8d1a4d8eeefe6cc84db12525474ba429267311a91ecf8590996621004f82f6": 29, "conv1": 29, "conv2d": 29, "kernel_s": 29, "stride": 29, "pad": 29, "bia": 29, "bn1": 29, "batchnorm2d": 29, "ep": 29, "1e": 29, "05": 29, "momentum": 29, "affin": 29, "track_running_stat": 29, "act1": 29, "relu": 29, "inplac": 29, "maxpool": 29, "maxpool2d": 29, "dilat": 29, "ceil_mod": 29, "layer1": 29, "sequenti": [29, 33], "drop_block": 29, "aa": 29, "conv2": 29, "bn2": 29, "act2": 29, "layer2": 29, "128": 29, "downsampl": 29, "layer3": 29, "256": 29, "layer4": 29, "512": 29, "global_pool": 29, "selectadaptivepool2d": 29, "pool_typ": 29, "avg": 29, "flatten": 29, "start_dim": 29, "end_dim": 29, "in_featur": 29, "1000": 29, "58": 29, "glmbbmq1b5vwt": 29, "runcfg": [30, 31, 41], "component_default": 30, "close": [30, 34, 41], "human": 30, "readabl": 30, "constructor": [30, 34], "scheduler_factori": 30, "schedulerfactori": [30, 34], "individu": [30, 42], "act": 30, "upon": [30, 45], "cach": 30, "direct": 30, "soon": 30, "interrupt": 30, "clean": 30, "deem": [30, 34, 41], "associ": [30, 45], "undefin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "ok": 30, "reconstruct": 30, "much": 30, "anymor": 30, "union": [30, 31, 35, 42, 45, 47], "parent_run_id": 30, "appdryruninfo": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dry": [30, 34], "pretti": 30, "dryrun_info": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dryrun_compon": 30, "component_arg": 30, "Will": 30, "listapprespons": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prototyp": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "phase": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "subject": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "should_tail": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "honor": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "guarante": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "highli": 30, "log_it": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "discourag": 30, "partial": [30, 35, 36, 37, 39, 41, 43, 44], "purg": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "whitespac": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "charact": 30, "newlin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "configvalu": [30, 45, 46], "present": [30, 31, 43, 45, 47], "anti": 30, "experi": [30, 46], "matches_regex": 30, "model_accuraci": 30, "parse_accuraci": 30, "experiment_nam": 30, "th": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "fetch": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "left": 30, "empti": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cursor": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "begin": 30, "unknownappexcept": 30, "order": [30, 31, 40, 45], "low": [30, 31], "file_path": 30, "componentvalidationexcept": 30, "componentnotfoundexcept": 30, "sparingli": 30, "abus": 30, "lead": 30, "go": 30, "complianc": 30, "term": 30, "unblock": 30, "certain": [30, 31, 41, 47], "short": 30, "scheduler_backend": [30, 34], "scheduler_run_opt": 30, "local_runopt": 30, "past": 30, "replac": [30, 31, 45, 47], "indefinit": 30, "app_statu": 30, "is_termin": 30, "sleep": [30, 33], "beta": [31, 47], "ini": 31, "sensibl": 31, "placehold": 31, "happi": 31, "redundantli": 31, "decid": 31, "date": 31, "leav": 31, "stale": 31, "ls": 31, "enviorn": 31, "torchx_config": 31, "hierarchi": 31, "overlaid": [31, 47], "malform": 31, "unrecogn": 31, "2x8": 31, "overwritten": [31, 33], "cmd": [31, 42, 44, 45], "addition": [31, 46], "some_workspac": 31, "outmost": 31, "hold": [31, 41, 44, 45], "dir_1": 31, "dir_2": 31, "textio": 31, "configfil": 31, "dump": [31, 33, 43], "required_onli": 31, "templat": [31, 45], "find_config": 31, "filepath": 31, "element": [31, 45], "get_config": 31, "barr": 31, "bazz": 31, "fooo": 31, "load_sect": 31, "content": [31, 44, 47], "categor": 32, "topic": [32, 42], "experiment": [33, 46], "AT": [33, 46], "risk": [33, 46], "TO": [33, 46], "keyword": 33, "intention": 33, "constrain": [33, 39, 40], "hundr": 33, "nor": 33, "quantiti": [33, 45], "hyper": 33, "suppos": 33, "app1": 33, "app2": 33, "feed": 33, "seem": 33, "worri": 33, "pseudo": 33, "do_someth": 33, "s3client": 33, "utf": 33, "output_fil": 33, "input_fil": 33, "decod": 33, "do_something_els": 33, "app1_out": 33, "app1_accuraci": 33, "l2norm": 33, "liter": [33, 45], "1kb": 33, "slash": 33, "statist": 33, "sem": 33, "uniqu": [33, 34, 43, 44, 45], "scope": 33, "central": 33, "entiti": 33, "strong": 33, "made": [33, 45], "similarli": 33, "consecut": 33, "BE": 33, "min": 33, "strongli": 33, "advis": 33, "concaten": 33, "experiment_id": 33, "trial_numb": 33, "123": 33, "attempt_1": 33, "233": 33, "outsid": 33, "get_scheduler_factori": 34, "get_default_scheduler_nam": 34, "default_scheduler_nam": 34, "abc": 34, "abstractmethod": 34, "kill": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "idempot": 34, "thread": [34, 41, 45], "safe": 34, "underli": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "longer": [34, 41], "wrap": [34, 40, 41, 46], "describeapprespons": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "qualifi": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "constitut": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "caller": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prior": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "Is": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "twice": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lost": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "live": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "arbitrari": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "stopiter": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "exhaust": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stuck": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "eventu": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "__getitem__": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "50": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "seek": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "50th": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "carriag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "select": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "combin": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "notimplementederror": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "encourag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "trivial": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "submit_dryrun": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "mostli": 34, "regard": 34, "not_set": 34, "appstat": [34, 40, 45], "unsubmit": [34, 45], "num_restart": [34, 45], "structured_error_msg": [34, 45], "ui_url": [34, 45], "roles_status": 34, "rolestatu": [34, 45], "suffici": 34, "recret": 34, "member": 34, "accessor": [34, 45], "popul": [34, 45], "userguid": 35, "batch_getstart": 35, "ecr": 35, "amazonecr": 35, "aws_batch_schedul": 35, "awsbatchschedul": 35, "log_client": 35, "docker_cli": [35, 36, 39, 40, 47], "dockercli": [35, 36, 39, 40, 47], "dockerworkspacemixin": [35, 36, 37, 39, 40, 47], "awsbatchopt": 35, "torchx_us": [35, 36, 39, 44], "1234": [35, 36, 39, 44], "ef": 35, "infiniband": 35, "uverbs0": 35, "perm": [35, 39, 40, 45], "rwm": [35, 37, 39, 40, 45], "parse_mount": [35, 37, 39, 40, 45], "fsx": 35, "repost": 35, "knowledg": 35, "center": 35, "lustr": 35, "fabric": 35, "efa": 35, "batchjob": 35, "nonetyp": [35, 42], "job_def": [35, 36, 38], "images_to_push": [35, 36, 39, 40, 47], "aws_sagemaker_schedul": 36, "awssagemakerschedul": 36, "awssagemakeropt": 36, "sagemakerschedul": 36, "awssagemakerjob": 36, "requri": 36, "docker_schedul": 37, "dockerschedul": 37, "dockeropt": 37, "closest": 37, "dockerjob": 37, "dockercontain": 37, "has_dock": 37, "gcp_batch_schedul": 38, "gcpbatchschedul": 38, "gcpbatchopt": 38, "app_id1234": 38, "gcloud": 38, "gcpbatchjob": 38, "batch_v1": 38, "upgrad": 39, "kubectl": 39, "githubusercont": 39, "develop": 39, "kubernetes_schedul": 39, "kubernetesschedul": 39, "apicli": [39, 40], "kubernetesopt": 39, "confirm": [39, 40], "issu": [39, 40, 42, 44], "120": 39, "occur": [39, 45], "bug": 39, "1651": 39, "extern": [39, 40], "hostpath": [39, 40], "persistentvolumeclaim": [39, 40], "claim": [39, 40], "16000": [39, 40], "reserv": [39, 40], "whole": [39, 40, 45], "reduc": [39, 40], "amount": [39, 40], "kubernetesjob": 39, "app_to_resourc": [39, 40], "macro": 39, "pod_label": [39, 40], "role_idx": [39, 40], "role_to_pod": [39, 40], "v1pod": [39, 40], "sanitize_for_seri": [39, 40], "obj": [39, 40, 45], "dispatch": 40, "appwrapp": 40, "codeflar": 40, "kubernetes_mcad_schedul": 40, "kubernetesmcadschedul": 40, "kubernetesmcadopt": 40, "among": 40, "e790d7f": 40, "your_image_repo": 40, "secondari": 40, "coschedul": 40, "podgroup": 40, "sig": 40, "tree": 40, "pkg": 40, "crd": 40, "k8": 40, "io_podgroup": 40, "At": 40, "guidanc": 40, "evict": [40, 45], "preemption": [40, 45], "multu": 40, "k8snetworkplumbingwg": 40, "cni": 40, "kubernetesmcadjob": 40, "mcad_svc": 40, "svc_name": 40, "service_port": 40, "v1servic": 40, "get_appwrapper_statu": 40, "get_port_for_servic": 40, "get_role_inform": 40, "generic_item": 40, "get_tasks_status_descript": 40, "unique_app_id": 40, "localschedul": 41, "image_provider_class": 41, "localopt": 41, "imageprovid": 41, "cache_s": 41, "extra_path": 41, "properti": [41, 45, 46], "enforc": 41, "orphan": 41, "cleanup": 41, "receiv": 41, "sigterm": 41, "sigint": 41, "spawn": 41, "faster": 41, "softwar": [41, 45], "cuda_visible_devic": 41, "accord": [41, 45], "replica_0": 41, "replica_1": 41, "role_0": 41, "role_1": 41, "replica_2": 41, "localhostschedul": 41, "real": 41, "op": 41, "fetch_rol": 41, "updat": [41, 47], "compli": [41, 45], "deleg": 41, "get_cwd": 41, "child": [41, 45], "get_entrypoint": 41, "get_replica_param": 41, "holder": 41, "cwdimageprovid": 41, "localdirectoryimageprovid": 41, "getcwd": 41, "conjunct": 41, "not_exist": 41, "image_typ": 41, "childprocess": 41, "logiter": 41, "log_fil": 41, "_popen": 41, "signalexcept": 41, "sigval": 41, "signal": 41, "got": 41, "feedback": 42, "edit": 42, "pak": 42, "lsf_schedul": 42, "lsfschedul": 42, "lsfopt": 42, "mnt": 42, "tofix": 42, "On": 42, "reoslv": 42, "lsfbsub": 42, "ray_schedul": 43, "rayschedul": 43, "ray_client": 43, "jobsubmissioncli": 43, "tmpdirworkspacemixin": 43, "rayopt": 43, "actor": 43, "torchxignor": [43, 47], "overridden": 43, "dummi": 43, "rayjob": 43, "wait_until_finish": 43, "has_rai": 43, "indic": 43, "rayactor": 43, "output_filenam": 43, "working_dir": 43, "ray_common": 43, "ip": 43, "connect": 43, "ray_main": 43, "slurm_schedul": 44, "slurmschedul": 44, "dirworkspacemixin": [44, 47], "slurmopt": 44, "heterogen": 44, "sbatch": 44, "jobid": 44, "abl": 44, "schedmd": 44, "section_opt": 44, "inherit": 44, "activ": 44, "heterogeneous_job": 44, "snapshot": 44, "1gb": 44, "realmemori": 44, "workaround": 44, "parallelclust": 44, "2198": 44, "slurmbatchrequest": 44, "slurmreplicarequest": 44, "srun_opt": 44, "sbatch_opt": 44, "classmethod": 44, "from_rol": 44, "nomem": 44, "srun": 44, "treatment": 45, "min_replica": 45, "base_imag": 45, "miss": 45, "bindmount": 45, "volumemount": 45, "devicemount": 45, "duti": 45, "ps": 45, "bundl": 45, "dictat": 45, "ball": 45, "my_imag": 45, "env_var": 45, "500": 45, "tcp_store": 45, "8080": 45, "auto": 45, "scale": 45, "give": 45, "least": 45, "9090": 45, "pre_proc": 45, "encount": 45, "unsuccess": 45, "hardwar": 45, "caveat": 45, "surviv": 45, "untouch": 45, "membership": 45, "departur": 45, "admitt": 45, "physic": 45, "ram": 45, "predec": 45, "registr": 45, "retriev": 45, "gpu_x_1": 45, "named_resources_aw": 45, "taken": 45, "mere": 45, "equval": 45, "mem": 45, "aws_t3": 45, "medium": 45, "aws_m5": 45, "8xlarg": 45, "aws_m5_2xlarg": 45, "aws_p3_2xlarg": 45, "aws_p3_8xlarg": 45, "aws_t3_medium": 45, "mention": 45, "image_root_dir": 45, "train_app": 45, "rank0_env": 45, "base_img_root": 45, "accept": 45, "run_config_opt": 45, "run_as_us": 45, "type_": 45, "cluster_id": 45, "preemptibl": 45, "illeg": 45, "bad_typ": 45, "cfg_kei": 45, "cfg_from_json_repr": 45, "json_repr": 45, "cfg_from_str": 45, "cfg_str": 45, "cast": 45, "appropri": 45, "unknown": 45, "cfg_liter": 45, "kv": 45, "semi": 45, "colon": 45, "cfgval": 45, "trail": 45, "strictli": 45, "correct": 45, "is_typ": 45, "tp": 45, "isinst": 45, "text": 45, "recent": 45, "filter_rol": 45, "appstatuserror": 45, "pend": 45, "yet": [45, 46], "unsuccessfulli": 45, "replicast": 45, "alia": 45, "src_path": 45, "dst_path": 45, "read_onli": 45, "mknode": 45, "file_lint": 45, "component_funct": 45, "lintermessag": 45, "vaidat": 45, "stypl": 45, "get_fn_docstr": 45, "char": 45, "torchfunctionvisitor": 45, "component_function_nam": 45, "visitor": 45, "torchxfunctionargsvalid": 45, "criteria": 45, "primitive_typ": 45, "visit_functiondef": 45, "functiondef": 45, "torchxargumenthelpformatt": 45, "prog": 45, "indent_incr": 45, "max_help_posit": 45, "width": 45, "formatt": 45, "app_specs_func_def": 45, "torchxfunctionvalid": 45, "torchxreturnvalid": 45, "practition": 46, "conceptu": 46, "uniform": 46, "solut": 46, "tracker_nam": 46, "inject": 46, "entry_point_or_module_factory_method": 46, "tracker1": 46, "tracker2": 46, "backend_2_entry_point": 46, "tracker3": 46, "mlflow": 46, "create_track": 46, "my_bucket": 46, "my_config": 46, "discover": 46, "accomplish": 46, "entry_point_nam": 46, "create_tracker_fn": 46, "app_run_from_env": 46, "torchx_job_id": 46, "app_run": 46, "fsspectrack": 46, "cmdtracker": 46, "parent": 46, "artifact_nam": 46, "consumpt": 46, "encapsul": 46, "stil": 46, "abstractfilesystem": [46, 47], "root_dir": 46, "backward": 46, "gurante": 46, "subdir": 46, "descend": 46, "cmd_tracker": 46, "workspacemixin": 47, "mix": 47, "abil": 47, "codebas": 47, "build_workspace_and_update_rol": 47, "simplest": 47, "effici": 47, "increment": 47, "mutat": 47, "dryrun_push_imag": 47, "dryrun_push": 47, "push_imag": 47, "workspace_opt": 47, "walk_workspac": 47, "ignore_nam": 47, "engin": 47, "builder": 47, "exclud": 47, "whose": 47, "_update_app_imag": 47, "_push_imag": 47}, "objects": {"torchx": [[3, 0, 0, "-", "cli"], [8, 0, 0, "-", "components"], [26, 0, 0, "-", "pipelines"], [30, 0, 0, "-", "runner"], [32, 0, 0, "-", "runtime"], [34, 0, 0, "-", "schedulers"], [45, 0, 0, "-", "specs"], [46, 0, 0, "-", "tracker"], [47, 0, 0, "-", "workspace"]], "torchx.cli.cmd_tracker": [[46, 1, 1, "", "CmdTracker"]], "torchx.components": [[4, 0, 0, "-", "component_test_base"], [5, 0, 0, "-", "dist"], [6, 0, 0, "-", "interpret"], [7, 0, 0, "-", "metrics"], [9, 0, 0, "-", "serve"], [10, 0, 0, "-", "train"], [11, 0, 0, "-", "utils"]], "torchx.components.component_test_base": [[4, 1, 1, "", "ComponentTestCase"]], "torchx.components.component_test_base.ComponentTestCase": [[4, 2, 1, "", "run_component"], [4, 2, 1, "", "setUp"], [4, 2, 1, "", "tearDown"], [4, 2, 1, "", "validate"]], "torchx.components.dist": [[5, 3, 1, "", "_TORCH_DEBUG_FLAGS"], [5, 4, 1, "", "ddp"]], "torchx.components.metrics": [[7, 4, 1, "", "tensorboard"]], "torchx.components.serve": [[9, 4, 1, "", "torchserve"]], "torchx.components.utils": [[11, 4, 1, "", "binary"], [11, 4, 1, "", "booth"], [11, 4, 1, "", "copy"], [11, 4, 1, "", "echo"], [11, 4, 1, "", "python"], [11, 4, 1, "", "sh"], [11, 4, 1, "", "touch"]], "torchx.pipelines": [[28, 0, 0, "-", "kfp"]], "torchx.pipelines.kfp.adapter": [[28, 1, 1, "", "ContainerFactory"], [28, 4, 1, "", "component_from_app"], [28, 4, 1, "", "component_spec_from_app"], [28, 4, 1, "", "container_from_app"], [28, 4, 1, "", "resource_from_app"]], "torchx.runner": [[30, 1, 1, "", "Runner"], [31, 0, 0, "-", "config"], [30, 4, 1, "", "get_runner"]], "torchx.runner.Runner": [[30, 2, 1, "", "cancel"], [30, 2, 1, "", "close"], [30, 2, 1, "", "describe"], [30, 2, 1, "", "dryrun"], [30, 2, 1, "", "dryrun_component"], [30, 2, 1, "", "list"], [30, 2, 1, "", "log_lines"], [30, 2, 1, "", "run"], [30, 2, 1, "", "run_component"], [30, 2, 1, "", "schedule"], [30, 2, 1, "", "scheduler_backends"], [30, 2, 1, "", "scheduler_run_opts"], [30, 2, 1, "", "status"], [30, 2, 1, "", "stop"], [30, 2, 1, "", "wait"]], "torchx.runner.config": [[31, 4, 1, "", "apply"], [31, 4, 1, "", "dump"], [31, 4, 1, "", "find_configs"], [31, 4, 1, "", "get_config"], [31, 4, 1, "", "get_configs"], [31, 4, 1, "", "load"], [31, 4, 1, "", "load_sections"]], "torchx.runtime": [[33, 0, 0, "-", "tracking"]], "torchx.runtime.tracking": [[33, 1, 1, "", "FsspecResultTracker"], [33, 1, 1, "", "ResultTracker"]], "torchx.schedulers": [[34, 1, 1, "", "Scheduler"], [34, 1, 1, "", "SchedulerFactory"], [35, 0, 0, "-", "aws_batch_scheduler"], [36, 0, 0, "-", "aws_sagemaker_scheduler"], [37, 0, 0, "-", "docker_scheduler"], [38, 0, 0, "-", "gcp_batch_scheduler"], [34, 4, 1, "", "get_default_scheduler_name"], [34, 4, 1, "", "get_scheduler_factories"], [40, 0, 0, "-", "kubernetes_mcad_scheduler"], [39, 0, 0, "-", "kubernetes_scheduler"], [41, 0, 0, "-", "local_scheduler"], [42, 0, 0, "-", "lsf_scheduler"], [43, 0, 0, "-", "ray_scheduler"], [44, 0, 0, "-", "slurm_scheduler"]], "torchx.schedulers.Scheduler": [[34, 2, 1, "", "cancel"], [34, 2, 1, "", "close"], [34, 2, 1, "", "describe"], [34, 2, 1, "", "exists"], [34, 2, 1, "", "list"], [34, 2, 1, "", "log_iter"], [34, 2, 1, "", "run_opts"], [34, 2, 1, "", "schedule"], [34, 2, 1, "", "submit"], [34, 2, 1, "", "submit_dryrun"]], "torchx.schedulers.api": [[34, 1, 1, "", "DescribeAppResponse"], [34, 1, 1, "", "ListAppResponse"]], "torchx.schedulers.aws_batch_scheduler": [[35, 1, 1, "", "AWSBatchScheduler"], [35, 1, 1, "", "BatchJob"], [35, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_batch_scheduler.AWSBatchScheduler": [[35, 2, 1, "", "describe"], [35, 2, 1, "", "list"], [35, 2, 1, "", "log_iter"], [35, 2, 1, "", "schedule"]], "torchx.schedulers.aws_sagemaker_scheduler": [[36, 1, 1, "", "AWSSageMakerJob"], [36, 1, 1, "", "AWSSageMakerScheduler"], [36, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_sagemaker_scheduler.AWSSageMakerScheduler": [[36, 2, 1, "", "describe"], [36, 2, 1, "", "list"], [36, 2, 1, "", "log_iter"], [36, 2, 1, "", "schedule"]], "torchx.schedulers.docker_scheduler": [[37, 1, 1, "", "DockerContainer"], [37, 1, 1, "", "DockerJob"], [37, 1, 1, "", "DockerScheduler"], [37, 4, 1, "", "create_scheduler"], [37, 4, 1, "", "has_docker"]], "torchx.schedulers.docker_scheduler.DockerScheduler": [[37, 2, 1, "", "describe"], [37, 2, 1, "", "list"], [37, 2, 1, "", "log_iter"], [37, 2, 1, "", "schedule"]], "torchx.schedulers.gcp_batch_scheduler": [[38, 1, 1, "", "GCPBatchJob"], [38, 1, 1, "", "GCPBatchScheduler"], [38, 4, 1, "", "create_scheduler"]], "torchx.schedulers.gcp_batch_scheduler.GCPBatchScheduler": [[38, 2, 1, "", "describe"], [38, 2, 1, "", "list"], [38, 2, 1, "", "log_iter"], [38, 2, 1, "", "run_opts"], [38, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_mcad_scheduler": [[40, 1, 1, "", "KubernetesMCADJob"], [40, 1, 1, "", "KubernetesMCADScheduler"], [40, 4, 1, "", "app_to_resource"], [40, 4, 1, "", "create_scheduler"], [40, 4, 1, "", "get_appwrapper_status"], [40, 4, 1, "", "get_port_for_service"], [40, 4, 1, "", "get_role_information"], [40, 4, 1, "", "get_tasks_status_description"], [40, 4, 1, "", "mcad_svc"], [40, 4, 1, "", "pod_labels"], [40, 4, 1, "", "role_to_pod"], [40, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_mcad_scheduler.KubernetesMCADScheduler": [[40, 2, 1, "", "describe"], [40, 2, 1, "", "list"], [40, 2, 1, "", "log_iter"], [40, 2, 1, "", "run_opts"], [40, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_scheduler": [[39, 1, 1, "", "KubernetesJob"], [39, 1, 1, "", "KubernetesScheduler"], [39, 4, 1, "", "app_to_resource"], [39, 4, 1, "", "create_scheduler"], [39, 4, 1, "", "pod_labels"], [39, 4, 1, "", "role_to_pod"], [39, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_scheduler.KubernetesScheduler": [[39, 2, 1, "", "describe"], [39, 2, 1, "", "list"], [39, 2, 1, "", "log_iter"], [39, 2, 1, "", "schedule"]], "torchx.schedulers.local_scheduler": [[41, 1, 1, "", "CWDImageProvider"], [41, 1, 1, "", "ImageProvider"], [41, 1, 1, "", "LocalDirectoryImageProvider"], [41, 1, 1, "", "LocalScheduler"], [41, 1, 1, "", "LogIterator"], [41, 1, 1, "", "PopenRequest"], [41, 1, 1, "", "ReplicaParam"], [41, 1, 1, "", "SignalException"], [41, 4, 1, "", "create_scheduler"]], "torchx.schedulers.local_scheduler.CWDImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.ImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "fetch_role"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"], [41, 2, 1, "", "get_replica_param"]], "torchx.schedulers.local_scheduler.LocalDirectoryImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.LocalScheduler": [[41, 2, 1, "", "auto_set_CUDA_VISIBLE_DEVICES"], [41, 2, 1, "", "close"], [41, 2, 1, "", "describe"], [41, 2, 1, "", "list"], [41, 2, 1, "", "log_iter"], [41, 2, 1, "", "schedule"]], "torchx.schedulers.lsf_scheduler": [[42, 1, 1, "", "LsfBsub"], [42, 1, 1, "", "LsfScheduler"], [42, 4, 1, "", "create_scheduler"]], "torchx.schedulers.lsf_scheduler.LsfScheduler": [[42, 2, 1, "", "describe"], [42, 2, 1, "", "list"], [42, 2, 1, "", "log_iter"], [42, 2, 1, "", "schedule"]], "torchx.schedulers.ray_scheduler": [[43, 1, 1, "", "RayJob"], [43, 1, 1, "", "RayScheduler"], [43, 4, 1, "", "create_scheduler"], [43, 4, 1, "", "has_ray"], [43, 4, 1, "", "serialize"]], "torchx.schedulers.ray_scheduler.RayScheduler": [[43, 2, 1, "", "describe"], [43, 2, 1, "", "list"], [43, 2, 1, "", "log_iter"], [43, 2, 1, "", "schedule"], [43, 2, 1, "", "wait_until_finish"]], "torchx.schedulers.slurm_scheduler": [[44, 1, 1, "", "SlurmBatchRequest"], [44, 1, 1, "", "SlurmReplicaRequest"], [44, 1, 1, "", "SlurmScheduler"], [44, 4, 1, "", "create_scheduler"]], "torchx.schedulers.slurm_scheduler.SlurmBatchRequest": [[44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmReplicaRequest": [[44, 2, 1, "", "from_role"], [44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmScheduler": [[44, 2, 1, "", "describe"], [44, 2, 1, "", "list"], [44, 2, 1, "", "log_iter"], [44, 2, 1, "", "schedule"]], "torchx.specs": [[45, 1, 1, "", "AppDef"], [45, 1, 1, "", "AppState"], [45, 1, 1, "", "AppStatus"], [45, 1, 1, "", "BindMount"], [45, 1, 1, "", "DeviceMount"], [45, 5, 1, "", "ReplicaState"], [45, 1, 1, "", "Resource"], [45, 1, 1, "", "RetryPolicy"], [45, 1, 1, "", "Role"], [45, 1, 1, "", "VolumeMount"], [45, 0, 0, "-", "file_linter"], [45, 4, 1, "", "get_named_resources"], [45, 1, 1, "", "macros"], [45, 0, 0, "-", "named_resources_aws"], [45, 4, 1, "", "parse_mounts"], [45, 4, 1, "", "resource"], [45, 1, 1, "", "runopts"]], "torchx.specs.AppStatus": [[45, 2, 1, "", "format"], [45, 2, 1, "", "raise_for_status"]], "torchx.specs.Resource": [[45, 2, 1, "", "copy"]], "torchx.specs.Role": [[45, 2, 1, "", "pre_proc"]], "torchx.specs.file_linter": [[45, 1, 1, "", "LinterMessage"], [45, 1, 1, "", "TorchFunctionVisitor"], [45, 1, 1, "", "TorchXArgumentHelpFormatter"], [45, 1, 1, "", "TorchxFunctionArgsValidator"], [45, 1, 1, "", "TorchxFunctionValidator"], [45, 1, 1, "", "TorchxReturnValidator"], [45, 4, 1, "", "get_fn_docstring"], [45, 4, 1, "", "validate"]], "torchx.specs.file_linter.TorchFunctionVisitor": [[45, 2, 1, "", "visit_FunctionDef"]], "torchx.specs.file_linter.TorchxFunctionArgsValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxFunctionValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxReturnValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.macros": [[45, 1, 1, "", "Values"]], "torchx.specs.macros.Values": [[45, 2, 1, "", "apply"], [45, 2, 1, "", "substitute"]], "torchx.specs.named_resources_aws": [[45, 4, 1, "", "aws_m5_2xlarge"], [45, 4, 1, "", "aws_p3_2xlarge"], [45, 4, 1, "", "aws_p3_8xlarge"], [45, 4, 1, "", "aws_t3_medium"]], "torchx.specs.runopts": [[45, 2, 1, "", "add"], [45, 2, 1, "", "cfg_from_json_repr"], [45, 2, 1, "", "cfg_from_str"], [45, 2, 1, "", "get"], [45, 2, 1, "", "is_type"], [45, 2, 1, "", "resolve"]], "torchx.tracker": [[46, 1, 1, "", "AppRun"]], "torchx.tracker.api": [[46, 1, 1, "", "TrackerBase"]], "torchx.tracker.backend.fsspec": [[46, 1, 1, "", "FsspecTracker"]], "torchx.workspace": [[47, 1, 1, "", "WorkspaceMixin"], [47, 0, 0, "-", "dir_workspace"], [47, 0, 0, "-", "docker_workspace"], [47, 4, 1, "", "walk_workspace"]], "torchx.workspace.WorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]], "torchx.workspace.dir_workspace": [[47, 1, 1, "", "DirWorkspaceMixin"]], "torchx.workspace.dir_workspace.DirWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"]], "torchx.workspace.docker_workspace": [[47, 1, 1, "", "DockerWorkspaceMixin"]], "torchx.workspace.docker_workspace.DockerWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:data", "4": "py:function", "5": "py:attribute"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "data", "Python data"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"]}, "titleterms": {"advanc": [0, 22, 46], "usag": [0, 14, 17, 20, 25, 31, 33, 46], "regist": 0, "custom": [0, 8, 12, 29], "schedul": [0, 2, 3, 25, 29, 34], "name": [0, 4, 45], "resourc": [0, 4, 8, 45], "compon": [0, 2, 3, 4, 5, 8, 12, 22, 25, 45], "app": [1, 14], "best": [1, 4, 25], "practic": [1, 4, 25], "data": [1, 14, 15, 46], "pass": [1, 8], "storag": 1, "train": [1, 10], "loop": 1, "metric": [1, 7], "checkpoint": 1, "fine": 1, "tune": 1, "interpret": [1, 6, 17], "model": [1, 17, 18], "packag": 1, "python": 1, "save": 1, "weight": 1, "torchscript": 1, "torchserv": 1, "archiv": 1, "mar": 1, "torch": 1, "serv": [1, 9], "infer": 1, "test": [1, 4], "basic": 2, "concept": 2, "project": 2, "structur": 2, "appdef": [2, 45], "runner": [2, 30], "pipelin": [2, 8, 21, 22, 23, 24, 25, 26, 28], "adapt": 2, "runtim": [2, 25], "next": [2, 27, 29], "step": [2, 27, 29], "cli": [3, 8, 31], "list": 3, "builtin": [3, 5, 8, 12], "support": 3, "argument": [3, 4, 22], "run": [3, 8, 45], "job": [3, 8, 46], "inspect": 3, "what": 3, "dryrun": 3, "describ": 3, "queri": [3, 46], "statu": [3, 45], "view": 3, "log": [3, 19], "entrypoint": 4, "simplifi": 4, "process": 4, "branch": 4, "logic": 4, "document": [4, 25], "compos": 4, "distribut": [4, 5, 23, 29], "defin": 4, "all": [4, 26, 34], "unit": 4, "integr": 4, "ddp": 5, "api": [5, 25, 31], "refer": [7, 25, 35, 36, 37, 38, 39, 40, 41, 42, 46], "overview": [8, 32, 33, 46], "us": 8, "author": 8, "valid": 8, "programmat": [8, 31], "param": 8, "from": 8, "addit": 8, "embed": 10, "script": 10, "util": 11, "hello": [12, 29], "world": [12, 13, 15, 29], "comput": [13, 15], "size": [13, 15], "exampl": [13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 46], "preprocess": [14, 15], "applic": [15, 25], "prerequisit": [15, 35, 38, 39, 40, 42], "lightn": 15, "trainer": [15, 16, 20], "dataset": 16, "tini": 18, "imagenet": 18, "simpl": 19, "profil": 19, "kubeflow": [21, 22, 23, 24, 28], "input": 22, "creat": 22, "definit": 22, "intro": 24, "torchx": [25, 26, 28, 30, 34, 45, 46, 47], "In": 25, "1": [25, 46], "2": [25, 46], "3": 25, "work": 25, "With": 25, "librari": 25, "airflow": 27, "kfp": 28, "quickstart": 29, "instal": 29, "launch": 29, "workspac": [29, 47], "patch": 29, "torchxconfig": [29, 31], "remot": 29, "imag": [29, 41], "docker": [29, 37], "base": [29, 33], "slurm": [29, 44], "function": [30, 31, 34], "class": [30, 34], "config": [31, 45], "track": 33, "resulttrack": 33, "fsspec": 33, "aw": [35, 36, 45], "batch": [35, 38], "sagemak": 36, "gcp": 38, "kubernet": [39, 40], "mcad": 40, "local": 41, "provid": 41, "ibm": 42, "spectrum": 42, "lsf": 42, "rai": 43, "spec": 45, "role": 45, "macro": 45, "mount": 45, "linter": 45, "tracker": 46, "setup": 46, "launcher": 46, "side": 46, "configur": 46, "user": 46, "acquir": 46, "apprun": 46, "instanc": 46, "trackerbas": 46, "implement": 46, "docker_workspac": 47, "dir_workspac": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["advanced", "app_best_practices", "basics", "cli", "component_best_practices", "components/distributed", "components/interpret", "components/metrics", "components/overview", "components/serve", "components/train", "components/utils", "custom_components", "examples_apps/compute_world_size/main", "examples_apps/datapreproc/datapreproc", "examples_apps/index", "examples_apps/lightning/data", "examples_apps/lightning/interpret", "examples_apps/lightning/model", "examples_apps/lightning/profiler", "examples_apps/lightning/train", "examples_pipelines/index", "examples_pipelines/kfp/advanced_pipeline", "examples_pipelines/kfp/dist_pipeline", "examples_pipelines/kfp/intro_pipeline", "index", "pipelines", "pipelines/airflow", "pipelines/kfp", "quickstart", "runner", "runner.config", "runtime/overview", "runtime/tracking", "schedulers", "schedulers/aws_batch", "schedulers/aws_sagemaker", "schedulers/docker", "schedulers/gcp_batch", "schedulers/kubernetes", "schedulers/kubernetes_mcad", "schedulers/local", "schedulers/lsf", "schedulers/ray", "schedulers/slurm", "specs", "tracker", "workspace"], "filenames": ["advanced.rst", "app_best_practices.rst", "basics.rst", "cli.rst", "component_best_practices.rst", "components/distributed.rst", "components/interpret.rst", "components/metrics.rst", "components/overview.rst", "components/serve.rst", "components/train.rst", "components/utils.rst", "custom_components.md", "examples_apps/compute_world_size/main.rst", "examples_apps/datapreproc/datapreproc.rst", "examples_apps/index.rst", "examples_apps/lightning/data.rst", "examples_apps/lightning/interpret.rst", "examples_apps/lightning/model.rst", "examples_apps/lightning/profiler.rst", "examples_apps/lightning/train.rst", "examples_pipelines/index.rst", "examples_pipelines/kfp/advanced_pipeline.rst", "examples_pipelines/kfp/dist_pipeline.rst", "examples_pipelines/kfp/intro_pipeline.rst", "index.rst", "pipelines.rst", "pipelines/airflow.md", "pipelines/kfp.rst", "quickstart.md", "runner.rst", "runner.config.rst", "runtime/overview.rst", "runtime/tracking.rst", "schedulers.rst", "schedulers/aws_batch.rst", "schedulers/aws_sagemaker.rst", "schedulers/docker.rst", "schedulers/gcp_batch.rst", "schedulers/kubernetes.rst", "schedulers/kubernetes_mcad.rst", "schedulers/local.rst", "schedulers/lsf.rst", "schedulers/ray.rst", "schedulers/slurm.rst", "specs.rst", "tracker.rst", "workspace.rst"], "titles": ["Advanced Usage", "App Best Practices", "Basic Concepts", "CLI", "Component Best Practices", "Distributed", "Interpret", "Metrics", "Overview", "Serve", "Train", "Utils", "Custom Components", "Compute World Size Example", "Data Preprocessing App Example", "Application Examples", "Trainer Datasets Example", "Model Interpretability Example", "Tiny ImageNet Model", "Simple Logging Profiler", "Trainer Example", "Pipelines Examples", "Advanced KubeFlow Pipelines Example", "Distributed KubeFlow Pipelines Example", "Intro KubeFlow Pipelines Example", "TorchX", "torchx.pipelines", "Airflow", "Kubeflow Pipelines", "Quickstart", "torchx.runner", ".torchxconfig", "Overview", "Tracking", "torchx.schedulers", "AWS Batch", "AWS SageMaker", "Docker", "GCP Batch", "Kubernetes", "Kubernetes-MCAD", "Local", "IBM Spectrum LSF", "Ray", "Slurm", "torchx.specs", "torchx.tracker", "torchx.workspace"], "terms": {"torchx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 27, 29, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "defin": [0, 2, 3, 8, 16, 22, 23, 24, 27, 29, 31, 34, 36, 40, 45, 46, 47], "plugin": [0, 34, 39, 40], "point": [0, 1, 2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "you": [0, 1, 2, 3, 4, 5, 7, 8, 10, 12, 14, 15, 16, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 42, 43, 45, 47], "configur": [0, 4, 12, 27, 29, 30, 31, 34, 35, 36, 38, 40, 43, 44, 45, 47], "best": [0, 2, 3, 8, 10, 30], "support": [0, 1, 2, 4, 8, 11, 25, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "your": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 18, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 42, 45, 46, 47], "infrastructur": [0, 1, 2, 13, 15, 32], "setup": [0, 3, 4, 13, 15, 16, 17, 20, 22, 35, 45], "most": [0, 1, 2, 3, 22, 45], "done": [0, 12, 27], "through": [0, 2, 3, 8, 27, 28, 29, 31, 45, 47], "python": [0, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 30, 43, 45], "s": [0, 1, 3, 4, 5, 8, 11, 14, 15, 16, 17, 18, 19, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "entri": [0, 29, 30, 36], "requir": [0, 2, 4, 5, 8, 10, 11, 12, 14, 15, 17, 22, 29, 31, 32, 35, 36, 39, 40, 41, 43, 45, 46, 47], "packag": [0, 22, 27, 29, 30, 46], "contain": [0, 2, 3, 5, 8, 9, 11, 12, 15, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 47], "them": [0, 1, 2, 3, 4, 8, 12, 16, 17, 20, 22, 23, 26, 31, 34, 35, 44, 45], "instal": [0, 8, 12, 13, 15, 23, 25, 27, 28, 35, 37, 38, 39, 40, 42, 43, 44, 45], "If": [0, 1, 2, 3, 4, 8, 12, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "don": [0, 1, 2, 8, 10, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "t": [0, 1, 2, 4, 8, 10, 12, 18, 22, 27, 28, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "have": [0, 1, 2, 3, 4, 5, 8, 10, 12, 14, 15, 16, 18, 20, 22, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "we": [0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 16, 18, 22, 23, 24, 27, 29, 34, 39, 44], "recommend": [0, 1, 2, 4, 6, 7, 30, 40], "make": [0, 1, 2, 4, 9, 13, 20, 22, 24, 29, 30, 31, 32, 33, 41, 45, 47], "one": [0, 1, 2, 3, 4, 5, 8, 22, 24, 29, 30, 31, 33, 36, 37, 39, 41, 45], "so": [0, 1, 3, 4, 5, 8, 12, 15, 16, 22, 26, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "can": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "share": [0, 4, 8, 12, 15, 29, 35, 42], "definit": [0, 1, 2, 4, 8, 9, 12, 15, 21, 23, 24, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "across": [0, 1, 2, 15, 16, 33], "team": 0, "org": [0, 7, 9, 12, 28, 29, 30, 45], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 14, 15, 17, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "describ": [0, 1, 2, 24, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "below": [0, 2, 3, 8, 17, 30, 31, 45], "specifi": [0, 2, 3, 4, 5, 7, 8, 11, 14, 16, 17, 19, 20, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "project": [0, 13, 29, 31, 38, 40, 47], "py": [0, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 41, 42, 43, 45], "file": [0, 1, 2, 3, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 22, 23, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "from": [0, 1, 2, 3, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "setuptool": 0, "import": [0, 1, 2, 3, 4, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 31, 33, 39, 40, 45, 46], "foobar": [0, 31, 33, 41, 45], "entry_point": [0, 46], "my_schedul": 0, "my": 0, "create_schedul": [0, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "named_resourc": [0, 8, 45], "gpu_x2": 0, "my_modul": [0, 45, 46], "mai": [0, 1, 2, 3, 8, 11, 12, 20, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "implement": [0, 15, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "class": [0, 1, 4, 16, 18, 19, 28, 29, 31, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "interfac": [0, 2, 34, 35, 36, 37, 38, 39, 40, 43, 44, 46, 47], "function": [0, 2, 3, 4, 5, 8, 11, 12, 13, 18, 23, 24, 27, 28, 29, 36, 45], "should": [0, 2, 3, 4, 8, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "follow": [0, 2, 5, 8, 12, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "signatur": 0, "def": [0, 1, 2, 3, 4, 8, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 31, 43, 44, 45], "session_nam": [0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "str": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "kwarg": [0, 16, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "object": [0, 2, 3, 8, 9, 16, 22, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "return": [0, 2, 4, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 27, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "myschedul": 0, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "ad": [0, 1, 2, 8, 30, 31, 44, 46], "an": [0, 2, 3, 4, 6, 7, 8, 10, 12, 13, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "A": [0, 2, 5, 8, 31, 45, 47], "set": [0, 1, 3, 4, 5, 11, 12, 22, 23, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 44, 45, 47], "predefin": [0, 2], "spec": [0, 2, 3, 4, 5, 8, 9, 12, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 37, 39, 40, 41, 42], "ar": [0, 1, 2, 3, 4, 5, 7, 8, 11, 15, 17, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "given": [0, 2, 3, 8, 9, 16, 30, 31, 41, 43, 45], "string": [0, 3, 4, 8, 11, 29, 41, 45, 47], "particularli": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "when": [0, 1, 2, 3, 4, 5, 7, 8, 22, 25, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cluster": [0, 2, 12, 14, 21, 22, 23, 24, 27, 29, 39, 40, 42, 43, 44, 45, 47], "ha": [0, 2, 4, 5, 8, 10, 11, 13, 15, 17, 19, 20, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "fix": [0, 2], "instanc": [0, 2, 4, 5, 7, 8, 12, 22, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "type": [0, 1, 2, 3, 4, 5, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45], "For": [0, 1, 2, 4, 5, 7, 8, 10, 15, 16, 17, 20, 22, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "deep": 0, "learn": [0, 1, 2, 3, 10, 20, 27, 40], "train": [0, 4, 5, 7, 8, 13, 15, 16, 17, 18, 19, 20, 22, 25, 27, 29, 31, 33, 36, 40], "kubernet": [0, 2, 3, 5, 12, 14, 17, 22, 23, 25, 28, 29, 30, 31, 34, 35, 37], "aw": [0, 2, 25, 29, 34, 44], "compris": 0, "onli": [0, 2, 3, 5, 8, 20, 22, 23, 24, 28, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "p3": 0, "16xlarg": 0, "64": [0, 16, 17, 29, 45], "vcpu": 0, "8": [0, 5, 7, 9, 11, 12, 29, 33, 41], "gpu": [0, 2, 3, 5, 11, 20, 27, 29, 39, 40, 41, 42, 45], "488gb": 0, "want": [0, 1, 2, 3, 4, 12, 22, 29, 31, 35], "enumer": [0, 17], "shirt": [0, 4], "size": [0, 4, 10, 16, 17, 20, 29, 33, 36, 42, 45], "gpu_x1": 0, "cpu": [0, 2, 3, 4, 5, 11, 20, 22, 29, 31, 39, 40, 45], "1": [0, 1, 2, 3, 4, 5, 8, 11, 12, 14, 16, 17, 18, 20, 22, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "memmb": [0, 2, 3, 5, 11, 22, 29, 39, 40, 45], "61_000": 0, "16": [0, 3, 12, 16, 29], "2": [0, 2, 3, 5, 8, 11, 12, 17, 20, 27, 28, 29, 31, 33, 39, 40, 41, 42, 45], "122_000": 0, "gpu_x3": 0, "32": [0, 20], "4": [0, 2, 5, 8, 12, 29, 31, 33, 39, 40, 41, 45], "244_000": 0, "gpu_x4": 0, "488_000": 0, "To": [0, 1, 2, 3, 8, 12, 14, 15, 16, 20, 22, 23, 24, 27, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "avail": [0, 8, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "need": [0, 1, 2, 3, 4, 7, 8, 13, 16, 22, 23, 24, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "via": [0, 1, 4, 7, 8, 11, 12, 14, 15, 16, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onc": [0, 1, 5, 8, 12, 22, 23, 24, 27, 29, 30, 34, 41], "manner": 0, "get_named_resourc": [0, 4, 45], "122000": 0, "appdef": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "role": [0, 2, 3, 4, 5, 8, 12, 23, 24, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "test_app": 0, "imag": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 22, 23, 24, 28, 31, 35, 36, 37, 39, 40, 42, 45, 47], "author": [0, 2, 3, 4, 10, 32], "cli": [0, 2, 4, 12, 15, 22, 25, 29, 30, 34, 35, 44, 46], "builtin": [0, 1, 2, 4, 13, 22, 23, 24, 27, 29, 30, 31], "possibl": [0, 1, 2, 4, 24, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "relev": [0, 41], "organ": [0, 8], "wai": [0, 1, 2, 5, 8, 12, 29, 33, 39, 40, 41], "user": [0, 1, 2, 3, 4, 10, 12, 29, 30, 31, 34, 35, 36, 39, 40, 41, 44, 45, 47], "see": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 22, 23, 24, 27, 28, 29, 30, 31, 35, 37, 38, 39, 40, 42, 44, 45, 47], "thei": [0, 2, 4, 5, 8, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "run": [0, 1, 2, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "entrypoint": [0, 2, 3, 5, 8, 10, 11, 12, 23, 24, 30, 34, 41, 44, 45, 46], "my_project": 0, "bar": [0, 2, 3, 8, 11, 14, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "had": 0, "directori": [0, 4, 8, 11, 12, 13, 16, 27, 29, 31, 33, 35, 36, 37, 41, 42, 43, 44, 45, 46, 47], "structur": [0, 1, 4, 8, 34, 45], "project_root": 0, "baz": [0, 31], "And": [0, 2], "singl": [0, 2, 4, 5, 8, 13, 14, 15, 16, 17, 18, 24, 26, 28, 29, 31, 43, 44, 45], "call": [0, 2, 4, 5, 8, 11, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "trainer": [0, 1, 2, 3, 4, 5, 7, 8, 10, 17, 19, 22, 28, 30, 33, 40, 45], "were": [0, 2, 3, 12, 29, 30], "foo": [0, 2, 3, 11, 14, 28, 29, 30, 31, 33, 39, 40, 43, 45, 47], "search": [0, 20, 31], "modul": [0, 2, 3, 4, 5, 8, 11, 13, 15, 16, 17, 20, 26, 28, 29, 30, 32, 33, 45, 46, 47], "all": [0, 1, 3, 8, 15, 17, 20, 21, 22, 23, 24, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "group": [0, 5, 13, 15, 27, 29, 30, 36, 44, 46], "found": [0, 3, 8, 12, 31, 44], "under": [0, 2, 3, 5, 8, 12, 17, 22, 29, 33, 46, 47], "prefix": [0, 3, 8, 29, 31, 36], "In": [0, 2, 3, 8, 22, 31, 33, 40, 47], "case": [0, 1, 2, 3, 4, 8, 22, 31, 40, 47], "would": [0, 1, 2, 4, 8, 12, 22, 29, 30, 31, 34, 41], "those": [0, 2, 28, 29, 31, 34, 41, 47], "__init__": [0, 16, 18, 19], "attempt": [0, 3, 19, 27, 29, 33, 36], "recurs": [0, 11], "namespac": [0, 14, 17, 20, 22, 29, 31, 39, 40], "without": [0, 1, 3, 15, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "howev": [0, 1, 2, 4, 10, 34, 40, 41, 45], "top": [0, 2, 29, 31, 37, 47], "level": [0, 2, 29, 30, 31, 33, 36, 39, 40, 46], "displai": [0, 39], "test": [0, 11, 13, 16, 17, 23, 28, 34, 39, 41, 43], "app": [0, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "own": [0, 1, 2, 3, 8, 11, 22, 29, 33, 34, 45, 46], "includ": [0, 1, 2, 3, 8, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "its": [0, 2, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "add": [0, 12, 14, 24, 29, 31, 34, 39, 40, 45, 47], "must": [0, 5, 8, 14, 16, 17, 18, 28, 29, 31, 35, 36, 37, 39, 40, 41, 42, 44, 45, 46, 47], "anoth": [0, 22, 33], "back": [0, 12, 33, 35], "e": [0, 2, 5, 8, 11, 15, 17, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "g": [0, 2, 5, 8, 11, 15, 17, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dist": [0, 4, 5, 8, 10, 12, 13, 15, 20, 22, 23, 29, 31, 40, 42, 45], "ddp": [0, 2, 4, 8, 12, 13, 15, 20, 22, 29, 30, 31, 42, 45], "versu": 0, "default": [0, 3, 5, 8, 12, 13, 14, 20, 22, 23, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 45], "two": [0, 2, 5, 12, 15, 22, 29, 33, 36, 39], "registri": [0, 29, 35, 36], "same": [0, 3, 8, 18, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "There": [0, 1, 2, 3], "overlap": 0, "differ": [0, 1, 2, 3, 4, 5, 8, 12, 14, 16, 31, 33, 40, 41, 44], "alias": 0, "concret": 0, "omit": [0, 2, 3, 8, 31], "shorter": 0, "underscor": 0, "_": [0, 14, 16, 18, 29], "_0": 0, "_1": 0, "etc": [0, 3, 15, 29, 34, 42], "exampl": [0, 2, 3, 6, 7, 8, 10, 12, 18, 19, 27, 28, 29, 31, 33, 35, 36, 37, 39, 40, 41, 42, 45, 47], "effect": [0, 4, 5, 17, 20, 29, 30, 35], "expos": [0, 30, 37, 39, 40, 41, 45, 46], "oppos": 0, "vanilla": 0, "11": [0, 12, 29], "3": [0, 3, 5, 8, 12, 16, 17, 18, 20, 23, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "util": [0, 1, 2, 3, 8, 10, 12, 13, 14, 15, 16, 17, 22, 25, 27, 29, 32, 35, 36, 38, 39, 40, 42, 44], "more": [0, 1, 2, 4, 5, 8, 10, 11, 12, 13, 17, 20, 22, 23, 24, 26, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "written": [1, 2, 5, 31], "ani": [1, 4, 8, 11, 12, 15, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "languag": 1, "well": [1, 3, 4, 7, 8, 10, 11, 22, 26, 27, 28, 30, 44], "librari": [1, 2, 8, 11, 12, 16, 20, 29, 32, 36, 43], "allow": [1, 2, 3, 4, 5, 11, 12, 16, 22, 24, 26, 29, 31, 33, 34, 41, 44, 45, 46, 47], "maximum": [1, 7, 29, 40, 44], "flexibl": [1, 2, 4], "do": [1, 2, 3, 4, 8, 10, 13, 15, 18, 29, 30, 39, 41, 45], "standard": [1, 20, 22, 24, 29, 45, 47], "start": [1, 7, 8, 9, 12, 19, 25, 27, 29, 30, 31, 35, 36, 38, 41, 45], "provid": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 17, 20, 22, 26, 28, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47], "consist": [1, 7, 15, 30, 33, 45], "built": [1, 3, 6, 12, 15, 17, 22, 24, 29, 33, 36, 47], "compon": [1, 6, 7, 9, 10, 11, 13, 15, 16, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 38, 39, 40, 41, 42, 44], "applic": [1, 2, 3, 5, 6, 8, 9, 13, 20, 27, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "inform": [1, 2, 4, 8, 10, 29, 30, 34, 35, 36, 39, 45, 46], "how": [1, 2, 3, 4, 6, 7, 8, 10, 12, 13, 15, 22, 23, 24, 28, 29, 30, 31, 35, 36, 40, 45, 46], "handl": [1, 2, 3, 18, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "manag": [1, 9, 22, 29, 36, 41, 45], "fsspec": [1, 2, 7, 9, 11, 14, 15, 16, 17, 18, 20, 22, 29, 46, 47], "pluggabl": [1, 46], "filesystem": [1, 2, 12, 22, 29, 35, 39, 40, 47], "just": [1, 4, 8, 12, 18, 22, 24, 29, 30, 31], "chang": [1, 3, 12, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "input": [1, 2, 17, 29, 33, 36, 45, 46], "output": [1, 11, 14, 17, 19, 20, 22, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "path": [1, 2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 16, 17, 18, 20, 22, 29, 30, 31, 33, 35, 36, 37, 39, 40, 41, 43, 45, 47], "access": [1, 2, 3, 4, 11, 12, 22, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "new": [1, 2, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "environ": [1, 4, 5, 11, 12, 20, 29, 31, 35, 36, 37, 38, 41, 43, 45, 46], "backend": [1, 2, 3, 5, 29, 30, 34, 46], "pytorch": [1, 2, 5, 7, 9, 11, 12, 13, 15, 16, 20, 22, 25, 27, 29, 39, 45], "lightn": [1, 2, 7, 16, 17, 18, 19, 20, 22], "out": [1, 2, 3, 4, 6, 8, 10, 11, 14, 17, 18, 29, 30, 33, 34, 44, 45], "box": [1, 4, 6, 8, 10, 11, 29, 34], "elsewher": 1, "seamless": 1, "integr": [1, 17, 22, 27], "remot": [1, 2, 5, 14, 16, 17, 20, 22, 25, 27, 34, 35, 36, 37, 39, 41, 47], "also": [1, 2, 3, 12, 13, 18, 29, 31, 33, 35, 36, 45], "easier": [1, 4, 9, 32], "transit": [1, 30], "distribut": [1, 2, 8, 10, 13, 15, 20, 21, 24, 25, 27, 28, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "elast": [1, 4, 5, 13, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lot": 1, "depend": [1, 2, 3, 8, 13, 15, 20, 22, 27, 29, 30, 34, 36, 39, 40, 41, 43, 45, 46, 47], "architectur": [1, 20], "which": [1, 2, 3, 5, 7, 8, 13, 15, 16, 19, 20, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "why": [1, 18, 28], "some": [1, 2, 3, 14, 15, 18, 22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "common": [1, 2, 4, 5], "choic": [1, 8], "pure": [1, 2, 4, 8], "light": 1, "ignit": 1, "log": [1, 7, 12, 15, 18, 20, 22, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "monitor": [1, 7, 20, 29, 36], "job": [1, 2, 4, 5, 11, 12, 13, 15, 23, 25, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "standalon": [1, 2, 8, 12, 18, 29, 30], "tensorboard": [1, 3, 7, 12, 19, 20, 22, 45], "sinc": [1, 2, 3, 4, 5, 7, 8, 28, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "nativ": [1, 37], "like": [1, 2, 4, 12, 17, 22, 24, 27, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "s3": [1, 2, 9, 11, 22, 29, 33, 36, 45, 46], "gc": 1, "view": 1, "complex": [1, 4, 24, 26], "about": [1, 2, 5, 10, 27, 30, 33, 40], "while": [1, 2, 12, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "within": [1, 5, 8, 22, 27, 29, 30, 33, 34, 35, 36, 45, 46], "period": [1, 7], "recov": 1, "failur": [1, 45], "restart": [1, 37, 45], "paramet": [1, 2, 3, 4, 5, 7, 8, 9, 11, 18, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "lose": 1, "progress": [1, 7, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "thing": [1, 3, 4, 29, 33], "transfer": [1, 12], "resum": 1, "command": [1, 3, 8, 10, 11, 29, 31, 37, 45, 46], "line": [1, 3, 10, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "argument": [1, 2, 5, 8, 10, 11, 20, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "transient": 1, "error": [1, 3, 11, 12, 13, 29, 41, 45], "continu": [1, 14, 16, 17, 26], "later": [1, 16, 31], "adjust": [1, 45], "rate": [1, 20], "load": [1, 2, 12, 16, 17, 20, 22, 29, 31, 33, 35, 36, 38], "less": [1, 29, 41, 44], "code": [1, 2, 4, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 31, 33, 36, 42, 44, 45, 46, 47], "better": [1, 4], "maintain": [1, 2], "number": [1, 5, 6, 8, 11, 12, 14, 16, 20, 29, 35, 36, 40, 41, 45, 46], "similar": [1, 2, 4, 29, 41, 45], "task": [1, 27, 39, 42], "captum": [1, 6, 15, 17], "analys": 1, "result": [1, 3, 4, 11, 17, 22, 28, 29, 30, 33, 34, 36, 41, 45, 46], "interact": [1, 9, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "jupyt": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29], "notebook": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 47], "commun": [1, 42], "hasn": 1, "format": [1, 4, 5, 8, 19, 28, 29, 30, 31, 33, 34, 44, 45], "here": [1, 3, 5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "coupl": 1, "option": [1, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 18, 20, 22, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "might": [1, 4, 29, 36], "ll": [1, 2, 3, 15, 22, 29, 31, 35, 39, 42], "state": [1, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dict": [1, 4, 5, 8, 9, 11, 19, 22, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41, 44, 45, 47], "ckpt": [1, 17, 22], "pt": [1, 9, 18], "modelcheckpoint": [1, 20], "hook": [1, 4], "work": [1, 8, 9, 10, 12, 22, 23, 29, 31, 36, 39, 40, 41, 42, 43, 44, 47], "harder": 1, "reusabl": [1, 4], "creat": [1, 2, 3, 4, 6, 7, 11, 12, 17, 18, 27, 29, 31, 34, 35, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47], "serializ": 1, "optim": [1, 18, 19, 29, 33], "execut": [1, 2, 5, 11, 12, 15, 27, 29, 33, 36, 39, 40, 41, 47], "perform": [1, 5, 9, 13, 15, 29, 45], "reli": [1, 3, 4, 5], "gil": 1, "These": [1, 4, 5, 9, 11, 15, 22, 26, 29, 45, 47], "complet": [1, 2, 4, 27, 30, 35, 36, 37, 39, 43, 44, 45], "self": [1, 3, 16, 18, 19, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "automat": [1, 29, 31, 39, 40, 45, 47], "convert": [1, 2, 8, 16, 17, 23, 24, 28, 45], "document": [1, 5, 11, 15, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "export": [1, 12, 18, 20, 22, 27, 29, 36], "quantiz": 1, "version": [1, 8, 12, 15, 18, 20, 29, 30, 39, 40, 46], "both": [1, 3, 4, 5, 8, 29, 30, 45], "full": [1, 3, 4, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 26, 29, 36, 47], "precis": 1, "consum": [1, 4, 12, 17, 22, 29, 30], "9": [1, 12, 27, 29, 30, 40], "0": [1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "separ": [1, 11, 13, 22, 24, 29, 37, 42], "It": [1, 2, 3, 4, 13, 15, 16, 17, 18, 20, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quit": [1, 7], "doesn": [1, 22, 28, 37, 39, 40, 41, 45], "widespread": 1, "adopt": 1, "upload": [1, 14, 18, 21, 22, 23, 24, 29, 36, 47], "api": [1, 2, 3, 4, 9, 13, 22, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "reason": [1, 30], "write": [1, 2, 3, 4, 8, 12, 16, 29, 30, 33, 36, 41, 45], "custom": [1, 3, 4, 6, 10, 22, 25, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "logic": [1, 2, 3, 13, 15, 31, 45], "deploi": [1, 9, 40], "build": [1, 2, 12, 29, 35, 36, 37, 39, 47], "server": [1, 3, 5, 7, 40, 45], "typic": [1, 2, 13, 22, 23, 24, 31, 33, 41, 45], "unit": 1, "other": [1, 2, 3, 4, 8, 12, 16, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "unittest": 1, "main": [1, 2, 4, 5, 11, 12, 13, 14, 15, 17, 20, 29, 33, 40, 41], "customapptest": 1, "testcas": 1, "test_main": 1, "none": [1, 3, 4, 5, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "src": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "dst": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "asserttru": 1, "high": [2, 30, 31], "behind": 2, "check": [2, 3, 12, 18, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quickstart": [2, 8, 12, 25, 39], "guid": [2, 8, 12, 25, 29, 40], "workspac": [2, 12, 13, 25, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "patch": [2, 13, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "tool": [2, 3, 9, 44, 45], "submit": [2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "stage": [2, 8, 11, 16, 19, 35, 38, 42], "ml": [2, 13, 25, 29, 36, 46], "platform": [2, 8, 24, 29], "abstract": [2, 22, 34, 41, 45, 46, 47], "uml": 2, "diagram": [2, 8, 30], "simpli": [2, 3, 8, 11, 14, 15, 31, 34, 41], "struct": 2, "actual": [2, 9, 13, 15, 22, 30, 31, 33, 34, 41, 45], "lingo": 2, "jobdefinit": 2, "yaml": [2, 21, 22, 23, 24, 28, 29, 39, 40], "disambigu": 2, "between": [2, 11, 16, 20, 29, 33, 35, 36, 45], "binari": [2, 3, 5, 11, 12, 29, 33, 41], "refer": [2, 3, 8, 15, 34, 43, 44, 45, 47], "understood": [2, 3], "simpl": [2, 3, 4, 8, 10, 12, 13, 14, 15, 17, 18, 29, 33, 34, 45], "echo": [2, 3, 8, 11, 12, 23, 24, 27, 35, 36, 38, 39, 40, 42, 44], "hello": [2, 3, 8, 10, 11, 13, 15, 23, 24, 25, 27, 31, 35, 36, 38, 39, 40, 44], "world": [2, 8, 11, 20, 31], "name": [2, 3, 5, 8, 9, 11, 12, 18, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "bin": [2, 3, 23, 24], "tmp": [2, 3, 11, 12, 14, 17, 20, 22, 27, 28, 29, 33, 41], "arg": [2, 3, 4, 8, 11, 12, 14, 16, 17, 20, 22, 23, 24, 28, 29, 30, 33, 34, 36, 41, 44, 45, 47], "num_replica": [2, 3, 4, 8, 11, 23, 28, 29, 41, 42, 45], "As": [2, 7, 8, 10, 13, 24, 33], "dataclass": 2, "encod": [2, 33, 45], "pass": [2, 3, 4, 5, 11, 12, 16, 20, 28, 29, 30, 31, 33, 37, 39, 40, 41, 44, 45, 47], "few": [2, 3, 8, 29, 33, 34], "varieti": [2, 5], "topolog": [2, 5], "mean": [2, 3, 19, 29, 31, 33, 35, 40], "multipl": [2, 3, 4, 5, 8, 23, 24, 30, 31, 34, 40, 41, 45], "repres": [2, 8, 28, 31, 43, 45], "non": [2, 4, 27, 34, 39, 45], "homogen": [2, 5], "coordin": [2, 5, 29, 33, 45], "mani": [2, 10, 30, 34], "worker": [2, 5, 11, 20, 22, 24, 29, 33, 43, 45], "doc": [2, 4, 8, 12, 13, 23, 28, 29, 35, 38, 39, 40, 42, 45, 47], "what": [2, 8, 22, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "field": [2, 3, 4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "good": 2, "scratch": [2, 4], "rather": [2, 3, 8, 13, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "templet": [2, 5, 8], "think": [2, 8], "conveni": [2, 3, 30, 45], "factori": [2, 4, 8, 28, 34, 43, 45, 46], "method": [2, 4, 8, 17, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "unlik": [2, 31, 45], "map": [2, 5, 8, 28, 30, 31, 33, 45, 47], "granular": 2, "vari": [2, 22], "abov": [2, 3, 8, 14, 27, 45], "readi": [2, 11, 25], "hardcod": 2, "data": [2, 5, 16, 17, 20, 22, 29, 33, 34, 36, 42, 45], "parallel": [2, 5, 15, 22, 29, 35, 45], "style": [2, 4, 5, 13, 15, 29, 31, 45], "node": [2, 3, 4, 5, 8, 15, 20, 22, 24, 28, 29, 30, 35, 39, 40, 41, 45], "jobnam": 2, "nnode": [2, 5, 8, 29], "int": [2, 3, 4, 5, 7, 8, 11, 14, 16, 18, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "script_arg": [2, 5, 8], "single_gpu": 2, "resourc": [2, 3, 5, 11, 12, 22, 23, 28, 29, 30, 34, 35, 36, 39, 40, 41, 44], "1024": [2, 5, 11, 22, 29, 45], "parameter": 2, "up": [2, 4, 8, 22, 27, 28, 29, 30, 31, 33, 38, 40, 41, 45], "effort": [2, 33], "than": [2, 4, 5, 8, 13, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "try": [2, 16, 30, 39], "over": [2, 5, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "gener": [2, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 36, 39, 44, 47], "everyth": [2, 3], "easi": [2, 5, 16, 22, 33], "cheap": 2, "base": [2, 3, 4, 8, 11, 12, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "repetit": 2, "protip": 2, "composit": 2, "achiev": 2, "purpos": [2, 8, 12, 17, 22, 29, 33, 41], "dsl": [2, 23, 24, 28], "section": [2, 8, 31, 45, 46], "understand": [2, 4, 6, 24, 26, 33], "context": [2, 8, 12, 29, 35, 47], "befor": [2, 3, 4, 7, 15, 22, 30, 45, 47], "brows": [2, 3, 8, 27, 29], "fit": [2, 3, 20, 29, 36], "doe": [2, 3, 8, 9, 11, 12, 13, 15, 16, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "exactli": 2, "expect": [2, 5, 34, 38, 40, 41, 44, 45, 47], "launch": [2, 3, 5, 7, 8, 12, 13, 14, 15, 17, 22, 23, 24, 26, 27, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onto": [2, 3, 14, 31, 34, 35], "app_spec": 2, "programmat": [2, 4, 12, 27, 29, 41, 43, 44, 47], "get_runn": [2, 8, 27, 30, 31], "appspec": [2, 35, 36, 37, 39, 43, 44], "list": [2, 4, 5, 8, 11, 14, 17, 18, 20, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "plug": 2, "workflow": [2, 3, 8, 11, 16, 27, 33], "specif": [2, 3, 5, 6, 24, 26, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "target": [2, 8, 17], "kubeflow": [2, 25, 26], "whatev": 2, "represent": 2, "kfp": [2, 21, 22, 23, 24], "containerop": [2, 24, 28], "accur": 2, "advanc": [2, 13, 21, 23, 24, 25, 29], "especi": [2, 4], "mini": 2, "control": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "flow": 2, "hpo": [2, 11, 19, 46], "sub": [2, 5, 29, 30, 32, 33], "inlin": [2, 24], "exact": [2, 3, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "semant": [2, 8, 30, 34, 47], "dynam": 2, "upstream": [2, 8], "take": [2, 3, 5, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "advantag": [2, 46], "featur": [2, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "tri": [2, 24], "canon": 2, "portabl": 2, "skip": [2, 17, 31, 47], "zero": [2, 17, 45], "echo_torchx": 2, "becaus": [2, 3, 13, 27, 31, 45], "essenti": [2, 3], "anywher": [2, 33], "agnost": [2, 20, 32], "fashion": [2, 22], "layer": [2, 12, 20, 29], "touch": [2, 11, 12], "infra": [2, 29, 36], "NOT": [2, 3, 16, 30, 31, 33, 41, 45], "boto3": [2, 35, 36], "input_path": [2, 14, 22], "session": [2, 30, 45, 46], "client": [2, 3, 12, 22, 23, 24, 34, 35, 36, 38, 39, 40, 41], "s3_input_path": 2, "split": [2, 13, 15], "bucket": [2, 9, 29, 33, 36], "kei": [2, 29, 31, 33, 36, 45, 46], "join": [2, 14, 16, 17, 18, 20, 22], "download_fil": 2, "torch": [2, 4, 5, 8, 13, 15, 16, 17, 18, 20, 29, 40, 45], "rest": 2, "breviti": [2, 3, 8, 31], "implicit": 2, "assumpt": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "One": [2, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "storag": [2, 3, 7, 16, 22, 29, 36, 39, 40, 46], "introduc": 2, "system": [2, 29, 35, 40], "framework": 2, "alreadi": [2, 3, 12, 19, 29, 30, 31], "io": [2, 5, 7, 9, 11, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "hood": [2, 5, 8, 33], "rewritten": 2, "pytorch_lightn": [2, 16, 18, 19, 20], "input_url": 2, "fs": [2, 14, 16, 18, 46, 47], "get_filesystem": 2, "open": [2, 14, 16, 17, 22, 23, 24, 33], "rb": [2, 14], "f": [2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "now": [2, 3, 12, 31], "compat": [2, 13, 15, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46], "store": [2, 5, 22, 29, 31, 33, 35, 36, 45], "variou": [2, 8, 15, 31, 46], "With": [2, 27, 40], "exist": [2, 4, 7, 12, 14, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "find": [2, 3, 31, 45], "pointer": 2, "ideal": 2, "time": [2, 3, 4, 7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "intend": [2, 24, 30, 33, 43, 45], "But": 2, "proper": 2, "perman": 2, "home": [2, 8, 12, 29, 30, 31], "even": [2, 3, 4, 41], "entir": [2, 45], "oss": [2, 18, 20], "until": [2, 4, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "happen": 2, "matur": 2, "commandlin": [3, 45, 47], "around": [3, 16, 22, 33], "runner": [3, 4, 8, 12, 25, 26, 27, 29, 31, 34, 35, 36, 43], "directli": [3, 4, 7, 8, 17, 22, 27, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "pipelin": [3, 7, 12, 15, 27, 29, 33, 45], "aka": [3, 30], "quickli": [3, 26], "iter": [3, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "incur": 3, "technic": 3, "cognit": 3, "overhead": 3, "deal": [3, 22, 33, 45], "doubt": 3, "help": [3, 4, 8, 12, 14, 15, 17, 20, 22, 29, 34, 38, 40, 45, 47], "consid": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "n": [3, 5, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "config": [3, 13, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "metric": [3, 4, 12, 19, 22, 25, 29, 30, 36, 46], "serv": [3, 12, 22, 25], "torchserv": [3, 9, 12, 18, 22], "get": [3, 8, 12, 16, 20, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "runopt": [3, 29, 30, 31, 34, 38, 40, 45, 47], "local_dock": [3, 12, 29, 31, 37, 45], "log_dir": [3, 12, 27, 29, 31, 41], "dir": [3, 7, 12, 13, 27, 29, 31, 41], "stdout": [3, 5, 11, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stderr": [3, 5, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica": [3, 5, 11, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "local_cwd": [3, 4, 5, 8, 12, 14, 17, 20, 25, 27, 29, 30, 31, 41, 44, 45], "slurm": [3, 4, 25, 34], "subcommand": [3, 8, 31, 46], "either": [3, 4, 8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "sched_nam": 3, "along": [3, 7, 8, 30], "cat": [3, 31], "my_trainer_spec": 3, "my_train": [3, 45], "detail": [3, 4, 20, 42], "chose": [3, 5, 29, 31, 34], "three": 3, "scheduler_arg": [3, 35, 39, 40], "known": [3, 11, 31, 34, 39], "run_opt": [3, 34, 38, 40], "run_config": 3, "each": [3, 4, 5, 11, 19, 20, 21, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "shown": [3, 31, 36], "comma": [3, 29, 31, 37, 45], "delimit": [3, 8, 20, 31, 45], "k": [3, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "v": [3, 15], "pair": [3, 31, 45], "seen": [3, 8], "usag": [3, 8, 12, 27, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "h": [3, 5, 8, 11, 29, 45], "msg": [3, 8, 11, 12, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45], "messag": [3, 8, 11, 27, 29, 34, 45], "show": [3, 8, 22, 27, 28, 29, 31], "exit": [3, 4, 7, 8, 12, 27, 29, 45], "put": [3, 14, 18, 27, 33], "togeth": [3, 23, 24, 44], "2022": 3, "06": [3, 27], "15": [3, 12, 27, 29], "08": 3, "57": [3, 29], "info": [3, 4, 5, 11, 12, 17, 22, 23, 24, 27, 28, 29, 30, 34, 35, 36, 37, 39, 40, 44], "locat": [3, 11, 29, 36, 38, 41, 43, 46], "crls3hcpwjmhc": 3, "By": [3, 41], "block": [3, 4, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "finish": [3, 12, 29, 36, 43], "instead": [3, 4, 5, 12, 27, 29, 30, 33, 34, 39, 43, 45, 47], "print": [3, 10, 12, 13, 14, 16, 17, 18, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "url": [3, 9, 11, 14, 33, 34, 45], "form": [3, 8, 44, 45], "scheduler_nam": [3, 31], "job_id": [3, 46], "keep": [3, 4, 31, 34], "note": [3, 4, 5, 8, 11, 12, 14, 15, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "identifi": [3, 18, 29, 34, 35, 45, 47], "regist": [3, 5, 9, 11, 29, 30, 31, 34, 45], "debug": [3, 5, 29, 36], "request": [3, 12, 29, 30, 34, 36, 39, 40, 41, 44, 45], "hello_world": [3, 12, 42, 45], "metadata": [3, 12, 22, 28, 29, 45, 46], "env": [3, 5, 11, 27, 29, 31, 37, 41, 44, 45], "max_retri": [3, 5, 11, 29, 39, 40, 44, 45], "port_map": [3, 28, 45], "capabl": [3, 5, 39, 40, 45, 47], "retry_polici": [3, 45], "retrypolici": [3, 45], "popenrequest": [3, 41], "app_id": [3, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "echo_c944ffb2": 3, "torchx_asmtmyqj": 3, "torchx_kiuk": 3, "role_param": [3, 41], "replicaparam": [3, 41], "torchelastic_error_fil": 3, "json": [3, 33, 34, 43, 45, 46], "role_log_dir": [3, 41], "look": [3, 29, 30, 31, 33, 45], "faux": 3, "local": [3, 5, 7, 12, 14, 16, 17, 20, 22, 25, 29, 30, 33, 34, 36, 37, 44, 47], "subprocess": [3, 18, 41], "popen": [3, 41], "simul": [3, 46], "posix": 3, "process": [3, 5, 13, 14, 15, 17, 22, 29, 34, 41], "nevertheless": 3, "valuabl": 3, "insight": 3, "translat": 3, "particular": [3, 4, 8, 31], "invers": 3, "That": [3, 45], "app_handl": [3, 30, 34, 45], "recreat": [3, 34, 39], "descript": [3, 8, 12, 14, 17, 20, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "alwai": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "100": [3, 12, 22, 30, 41], "wa": [3, 19, 27, 30, 34, 44, 45], "extent": [3, 30], "numer": [3, 33], "factor": 3, "describe_job": 3, "whether": [3, 5, 8, 29, 35, 36, 37, 39, 43, 45], "ignor": [3, 5, 11, 17, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "never": [3, 16, 19], "spot": [3, 29, 36], "filter": [3, 30], "down": [3, 44], "larg": [3, 29, 33, 46], "long": [3, 30], "retain": [3, 29, 36], "archiv": [3, 9, 18], "behalf": [3, 41], "get_log": 3, "obtain": 3, "manual": [3, 4, 27, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "retent": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "properli": [3, 13], "wrapper": [3, 16], "let": [3, 8, 12, 14, 22, 29, 30], "pull": [3, 12, 41, 45], "place": [3, 4, 16, 17, 20, 22, 29, 40, 42, 44], "pattern": [3, 29, 30, 37, 45], "explanatori": 3, "id": [3, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "tail": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "still": [3, 8, 33, 46], "regex": [3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "except": [3, 16, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "role_nam": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica_id": [3, 39, 40, 44, 45], "rank": [3, 20, 30, 33], "side": [3, 7], "appli": [3, 28, 29, 31, 37, 39, 40, 45, 47], "veri": [3, 6, 18, 29], "tax": 3, "host": [3, 5, 8, 11, 29, 30, 35, 36, 37, 39, 40, 41, 42, 45], "pleas": [3, 27, 29, 34, 42, 45], "judgment": 3, "status": [3, 45], "further": [3, 29, 34], "a5qvfhe1hyq2w": 3, "succeed": [3, 12, 29, 45], "d796ei2tdtest": 3, "em0iao2m90000": 3, "fail": [3, 12, 30, 37, 39, 45], "ew33oxmdg0123": 3, "design": [4, 25, 26, 27, 45], "deviat": 4, "necessari": [4, 15, 30, 34, 41, 45], "m": [4, 5, 8, 11, 12, 18, 22, 29], "docker": [4, 5, 8, 12, 22, 25, 34, 35, 36, 45, 47], "resolut": [4, 30], "isn": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "folder": [4, 14, 16, 18, 47], "regardless": 4, "img_nam": 4, "img_vers": 4, "reus": [4, 12, 16], "hard": [4, 32], "sort": 4, "manipul": 4, "imposs": 4, "convent": [4, 33], "avoid": [4, 29, 30], "where": [4, 5, 8, 11, 22, 26, 29, 31, 33, 36, 43, 44, 45, 46], "feel": 4, "statement": 4, "prefer": [4, 34, 39, 40, 41, 45], "trainer_test": 4, "_trainer": 4, "trainer_prod": 4, "10": [4, 12, 20, 29, 30, 40, 45], "ref": 4, "overview": [4, 25], "memori": [4, 5, 11, 29, 39, 40, 42, 44, 47], "alloc": [4, 22, 30, 34, 39, 40, 41, 44, 45], "independ": [4, 40], "schedul": [4, 5, 8, 11, 12, 13, 14, 15, 17, 20, 22, 23, 26, 27, 28, 30, 31, 32, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "behavior": [4, 7, 26, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "altern": [4, 30, 46], "merg": 4, "could": [4, 45], "ui": [4, 22, 23, 24, 28, 45, 46], "sidecar": 4, "servic": [4, 7, 22, 29, 34, 39, 40, 46], "re": [4, 22, 25, 27, 34, 39, 40, 45], "comput": [4, 18, 20, 35], "extend": [4, 46], "dictionari": [4, 29, 34, 36], "figur": [4, 18], "static": [4, 22, 42, 45], "pyre": [4, 16, 17, 18], "mypi": 4, "normal": [4, 12, 14, 15, 16, 22, 27, 29], "valid": [4, 11, 13, 15, 22, 30, 33, 34, 41, 45], "componenttestcas": 4, "ensur": [4, 13, 17, 20, 34], "pars": [4, 30, 33, 45], "stricter": 4, "component_test_bas": 4, "methodnam": 4, "runtest": 4, "sourc": [4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "run_compon": [4, 27, 30], "callabl": [4, 16, 41, 45, 46], "scheduler_param": [4, 30], "interv": [4, 30], "float": [4, 7, 8, 11, 18, 19, 20, 30, 31, 33, 45, 47], "timeout": [4, 7, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "appstatu": [4, 30, 45], "helper": [4, 47], "hide": 4, "poll": [4, 7, 30], "reach": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "compplet": 4, "max": [4, 5, 45], "fixtur": 4, "exercis": 4, "teardown": [4, 16], "deconstruct": 4, "after": [4, 8, 17, 22, 29, 31, 36, 45], "function_nam": [4, 30], "fn": [4, 45], "bash": [4, 11, 44], "script": [4, 5, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 29, 42, 44], "core": [4, 24, 45], "gang": [5, 37, 39, 40], "copi": [5, 11, 12, 22, 29, 37, 43, 45], "leverag": [5, 22, 24, 29], "express": [5, 29, 36], "overal": 5, "wise": 5, "wherea": 5, "num": [5, 29, 45], "assum": [5, 8, 17, 22, 29, 33, 35, 39, 40, 41], "x": [5, 18, 29, 42], "j": [5, 15, 20, 22, 29, 31, 42], "1x4": 5, "total": [5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 30, 41], "2x4": 5, "rdzv_port": [5, 29], "master": [5, 22, 40], "port": [5, 7, 29, 45], "29500": [5, 29], "cfg": [5, 12, 13, 14, 20, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "queue": [5, 14, 23, 28, 29, 31, 35, 39, 42], "autosc": 5, "minimum": [5, 30, 39, 40, 45], "5": [5, 11, 12, 14, 16, 17, 29, 45], "5x8": 5, "compar": 5, "torchelast": [5, 29, 45], "read": [5, 16, 22, 23, 24, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "ghcr": [5, 7, 9, 11, 12, 29, 31], "0dev0": [5, 7, 9, 11, 12, 29], "1x2": [5, 15, 20, 29, 31], "rdzv_backend": [5, 8, 29], "c10d": [5, 8, 29], "mount": [5, 11, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "bool": [5, 8, 9, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "fals": [5, 8, 9, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "tee": [5, 29], "multi": [5, 8, 24, 29, 35, 39, 40, 43], "rendezv": [5, 29], "rendezvous_endpoint": [5, 29], "rank_0_host": [5, 29], "instruct": [5, 12, 15, 17, 29, 39, 40], "free": [5, 29, 30, 34, 41, 45], "random": [5, 16, 17, 20, 29], "mutual": [5, 11, 29, 45], "exclus": [5, 11, 29, 45], "preced": [5, 11, 29, 31, 41], "overrid": [5, 29, 30, 31, 34, 41, 43, 45], "experimentnam": [5, 29], "runnam": [5, 29], "per": [5, 8, 11, 20, 22, 29, 33, 39, 41, 43, 44], "mb": [5, 11, 29, 45], "min_nnod": [5, 29], "nproc_per_nod": [5, 8, 29], "exce": [5, 29], "varibl": [5, 11, 29], "env1": [5, 11, 29, 37], "v1": [5, 8, 11, 12, 28, 29, 37, 39, 40, 45], "env2": [5, 11, 29, 37], "v2": [5, 8, 11, 29, 37, 45], "env3": [5, 11, 29, 37], "v3": [5, 8, 11, 29, 37, 45], "retri": [5, 11, 29, 39, 40, 41, 45], "rank0": [5, 29], "chosen": [5, 29], "ex": [5, 11, 29, 35, 36, 37, 39, 40, 45], "bind": [5, 11, 29, 35, 37, 39, 40, 42, 45], "volum": [5, 11, 29, 35, 36, 37, 39, 40, 45], "readonli": [5, 11, 29, 35, 37, 39, 40, 45], "preset": [5, 29], "flag": [5, 8, 29], "enabl": [5, 12, 29, 36, 38, 40, 44, 46], "std": [5, 29], "stream": [5, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "consol": [5, 29], "_torch_debug_flag": 5, "commonli": 5, "variabl": [5, 11, 29, 31, 34, 36, 37, 41, 43, 45], "cuda_launch_block": 5, "nccl_desync_debug": 5, "torch_distributed_debug": 5, "torch_show_cpp_stacktrac": 5, "model": [6, 7, 9, 10, 15, 16, 20, 22, 27, 28, 29, 33, 36, 41, 46], "often": [6, 10, 33, 46], "thu": [6, 39, 40, 45], "analyz": [6, 17], "render": [6, 7], "cloud": [7, 16, 22, 38, 39, 40, 42], "Or": [7, 31], "part": [7, 12, 15, 19, 21, 24, 26, 28, 30, 33, 45], "tensorboardlogg": [7, 20], "tutori": [7, 11, 17], "http": [7, 9, 12, 13, 14, 15, 17, 22, 23, 28, 29, 30, 35, 38, 39, 40, 42, 44, 45, 47], "intermedi": [7, 12, 29], "tensorboard_tutori": 7, "html": [7, 9, 29, 35, 44, 45], "logger": [7, 19, 20], "readthedoc": 7, "en": [7, 23, 28, 42], "stabl": [7, 29], "extens": 7, "logdir": 7, "3600": 7, "6006": 7, "start_on_fil": 7, "exit_on_fil": 7, "termin": [7, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "condit": 7, "caus": [7, 17], "trigger": 7, "correspond": [7, 28, 31, 45], "second": [7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 36, 44], "shutdown": 7, "illustr": 8, "Not": [8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "current": [8, 11, 12, 22, 27, 28, 29, 31, 36, 39, 41, 43, 44, 45, 46, 47], "collect": [8, 13, 15, 27, 29], "categori": 8, "our": [8, 12, 16, 17, 20, 23, 24, 29], "page": 8, "ve": [8, 22], "being": [8, 29, 47], "downstream": [8, 30], "o": 8, "sure": [8, 22, 30, 31, 45], "rule": [8, 45, 47], "thumb": 8, "familiar": 8, "yourself": 8, "pep": 8, "484": 8, "annot": [8, 34, 45], "primit": [8, 45], "primitive_kei": 8, "primitive_valu": 8, "var_arg": 8, "docstr": [8, 45], "googl": [8, 12, 29, 38, 45], "function_with_pep484_type_annot": 8, "autogener": 8, "pick": [8, 31], "simplifi": 8, "os": [8, 14, 16, 17, 18, 20, 22, 41, 46], "aws_p3": [8, 45], "2xlarg": [8, 45], "basenam": [8, 14], "rdzv_endpoint": 8, "localhost": [8, 12, 15, 25, 41], "5900": 8, "nprocs_per_nod": 8, "save": [8, 14, 16, 17, 18, 20, 27, 29, 33, 36], "torchx_param": 8, "tip": [8, 31, 45], "improv": [8, 45], "posit": [8, 29], "dep": [8, 27], "machin": [8, 10, 39, 40, 45], "bodi": [8, 31], "Then": [8, 31], "reflect": [8, 47], "correctli": [8, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "easiest": 8, "dryrun": [8, 9, 30, 47], "linter": 8, "dist_test": 8, "ident": [8, 29, 41, 46], "fact": 8, "walk": [8, 14, 16, 47], "though": 8, "basic": [8, 12, 25, 34, 38, 40, 47], "invok": [8, 41, 45], "regular": [8, 14, 15, 29, 36], "component_modul": 8, "component_fn": 8, "rel": [8, 12, 13, 29, 30, 36, 41], "d": [8, 12, 29, 31], "drop": [8, 31], "slightli": [8, 29], "syntax": [8, 24], "component_path": [8, 30], "bob": [8, 31], "absolut": [8, 29, 30, 36, 41], "shell": [8, 44], "expans": 8, "cwd": [8, 29, 31, 41], "cd": [8, 15, 31], "know": [8, 22, 29, 45], "straight": 8, "forward": [8, 18], "program": [8, 11, 12, 14, 17, 29], "doubl": [8, 13], "dash": 8, "param_nam": 8, "param1": 8, "argpars": [8, 12, 14, 17, 20, 22], "parser": [8, 12, 14, 17, 20, 22], "summari": [8, 19], "imagin": 8, "comp": 8, "i": [8, 16, 17, 27, 29], "b": [8, 31], "l": 8, "vararg": [8, 31], "true": [8, 12, 14, 17, 18, 20, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "k1": 8, "k2": 8, "k3": 8, "c": [8, 10, 11, 29, 31, 43], "henc": [8, 14, 31, 33, 45, 46], "end": [8, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "arg1": 8, "arg2": 8, "arg3": 8, "adapt": [8, 22, 23, 24, 26, 28, 35, 45, 46], "orchestr": [8, 27], "expositori": [8, 13], "quick": [8, 16], "practic": [8, 10], "aim": 9, "infer": [9, 18, 20, 22, 29, 36], "model_path": [9, 22], "management_api": [9, 22], "param": [9, 22, 27, 45], "endpoint": [9, 22, 29, 36], "8081": [9, 22, 45], "root": [9, 14, 15, 16, 31, 41, 43, 45], "loop": 10, "construct": [10, 30, 45, 46], "emb": 10, "limit": [10, 11, 14, 16, 22, 29, 33, 41, 46], "smaller": 10, "sy": [10, 11, 12, 14, 17, 20, 22, 29], "argv": [10, 11, 12, 14, 17, 20, 22, 29], "cp": [11, 42], "meant": 11, "materi": [11, 44], "glue": 11, "oper": [11, 13, 15, 22, 23, 27, 28, 29, 33, 45, 46], "meaning": 11, "sh": [11, 12, 23, 27, 28, 29, 39], "substitut": [11, 45], "destin": 11, "torchx_utils_python": [11, 29], "length": [11, 29], "booth": [11, 12], "x1": 11, "x2": 11, "trial_idx": 11, "tracker_bas": [11, 33], "evalu": [11, 29, 30, 36], "7": [11, 12, 29], "fsspecresulttrack": [11, 33], "outdir": 11, "uri": [11, 29, 33, 36], "tracker": [11, 12, 25, 27, 29, 33], "torchx_utils_binari": 11, "off": [12, 29], "anyth": [12, 29, 41], "writefil": [12, 29], "my_app": [12, 25, 29], "__name__": [12, 13, 14, 17, 20, 33], "__main__": [12, 13, 14, 17, 20, 33], "argumentpars": [12, 14, 17, 20, 22], "add_argu": [12, 14, 17, 20, 22], "person": [12, 31], "greet": 12, "parse_arg": [12, 14, 17, 20, 22], "friendli": 12, "my_compon": [12, 30, 31], "latest": [12, 28, 29, 35, 36, 39, 40, 41, 42, 45], "greeter": 12, "2024": [12, 27, 29], "09": [12, 27, 29], "23": [12, 27, 29], "50": [12, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "26": [12, 29], "temporari": [12, 27, 29], "delet": [12, 27, 29], "preserv": [12, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "torchx_3pxeqfza": 12, "wait": [12, 27, 29, 30, 34, 36, 43], "27": [12, 29], "t612tjqcg72tm": 12, "won": [12, 29, 44], "colab": [12, 29], "com": [12, 15, 22, 29, 35, 36, 37, 38, 39, 40, 42, 44, 45, 47], "dockerfil": [12, 29, 47], "0rc1": 12, "34": [12, 29, 40], "driver": [12, 45], "intern": [12, 27], "99b": 12, "0s": 12, "4s": 12, "dockerignor": [12, 47], "2b": 12, "425b": 12, "sha256": [12, 29, 47], "a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3": 12, "resolv": [12, 29, 30, 41, 45], "889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f": 12, "189b": 12, "1s": 12, "25kb": 12, "3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c": 12, "21kb": 12, "6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203": 12, "0b": 12, "94mb": 12, "4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca": 12, "70mb": 12, "143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907": 12, "00gb": 12, "d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726": 12, "857b": 12, "12": [12, 29, 41], "58mb": 12, "2s": 12, "3s": 12, "20": [12, 29], "97mb": 12, "eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa": 12, "132b": 12, "extract": [12, 16, 29, 36], "06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77d": 12, "257b": 12, "d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71": 12, "21": [12, 29, 39], "46mb": 12, "5s": 12, "6": [12, 29, 39], "91mb": 12, "f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6": 12, "71gb": 12, "6s": 12, "c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932": 12, "92b": 12, "30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2": 12, "352b": 12, "7s": 12, "909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233": 12, "341": 12, "29mb": 12, "8s": 12, "18": [12, 39, 40], "80mb": 12, "66mb": 12, "91": 12, "13mb": 12, "54": [12, 29], "53mb": 12, "75": 12, "50mb": 12, "97": 12, "52mb": 12, "188": 12, "74mb": 12, "9s": 12, "130": 12, "02mb": 12, "180": 12, "36mb": 12, "285": 12, "21mb": 12, "206": 12, "57mb": 12, "231": 12, "272": 12, "63mb": 12, "208": 12, "67mb": 12, "373": 12, "301": 12, "99mb": 12, "327": 12, "16mb": 12, "477": 12, "10mb": 12, "320": 12, "86mb": 12, "f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800": 12, "563": 12, "38kb": 12, "570": 12, "43mb": 12, "88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968": 12, "556": 12, "96kb": 12, "658": 12, "51mb": 12, "426": 12, "72mb": 12, "775": 12, "95mb": 12, "529": 12, "870": 12, "32mb": 12, "965": 12, "642": 12, "78mb": 12, "06gb": 12, "763": 12, "15gb": 12, "878": 12, "71mb": 12, "27gb": 12, "36gb": 12, "989": 12, "48gb": 12, "58gb": 12, "10gb": 12, "67gb": 12, "20gb": 12, "32gb": 12, "42gb": 12, "13": [12, 16, 27, 29, 40], "53gb": 12, "64gb": 12, "14": [12, 18], "75gb": 12, "85gb": 12, "97gb": 12, "17": [12, 29], "19": 12, "25": [12, 29], "30": [12, 29, 36, 43], "88": 12, "52f92f1ea8896a4af1a2d7d38d453472fadfd957266638a6a697d9be0c1eb796": 12, "52": 12, "disabl": [12, 29, 31, 36, 44], "warn": [12, 27, 29, 34], "fall": 12, "404": 12, "45": [12, 29], "tag": [12, 27, 29, 35, 36, 47], "amp": 12, "fromimag": 12, "deni": 12, "repositori": [12, 29, 35, 36, 37, 39, 40, 47], "39": [12, 27, 29], "login": [12, 47], "step": [12, 22, 24, 25, 45, 46], "gt": [12, 27, 29], "52f92f1ea889": 12, "e41cf3989ab3": 12, "label": [12, 17, 29, 36], "a02128ae8d20": 12, "remov": [12, 27, 29, 30, 39], "df927abef23": 12, "successfulli": [12, 29, 30, 45], "df927abef23e3c2c2e3fe0f1968dac429f9c27bda68c1c5cc0db55e084070de4": 12, "origin": [12, 29, 35, 36, 37, 39, 43, 44, 45], "22": [12, 29, 39, 40], "nggtfwhqbh90pd": 12, "push": [12, 29, 35, 36, 37, 39, 40, 47], "premad": 12, "discov": 12, "spmd": 12, "46": [12, 29], "3ad6a395fc8b": [12, 29], "53": [12, 29], "3cd7205d8e0d": 12, "e6f5c28b7488": 12, "55": [12, 27, 29], "01": 12, "b98ce4756599": 12, "b98ce4756599a7543c6edd010ceea97dad6110805187418250d643814c43de0a": 12, "twjnz522z350w": 12, "click": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "download": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 36, 41, 42], "minim": [13, 15], "initi": [13, 15, 16, 20, 29, 36, 43, 45], "all_reduc": [13, 15, 29], "enough": [13, 15], "compute_world_s": [13, 15], "submodul": 13, "e2": [13, 25], "diff": [13, 47], "hydra": 13, "stack": 13, "been": [13, 17, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "seriou": 13, "omegaconf": 13, "dictconfig": 13, "multiprocess": 13, "record": [13, 19, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "to_yaml": 13, "throw": [13, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "rais": [13, 19, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "runtimeerror": 13, "compos": [13, 14, 16], "ipython": 13, "pwd": 13, "ab": 13, "cc": 13, "jupyter_notebook": 13, "initialize_config_modul": 13, "config_modul": 13, "config_nam": 13, "minut": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 44], "000": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "ipynb": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "galleri": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "sphinx": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "torchvis": [14, 15, 16, 18, 29], "reupload": [14, 15], "datapreproc": [14, 17, 22], "cs231n": [14, 22], "stanford": [14, 22], "edu": [14, 22], "tini": [14, 15, 16, 22], "imagenet": [14, 15, 16, 22], "200": [14, 18, 22], "zip": [14, 15, 21, 22], "output_path": [14, 16, 17, 20, 22, 29, 36], "tarfil": [14, 16], "tempfil": [14, 17, 20], "zipfil": 14, "pil": [14, 16], "transform": [14, 16, 22, 26, 28], "dataset": [14, 15, 17, 18], "is_image_fil": [14, 16], "tqdm": [14, 16, 29], "tar": [14, 16, 45], "gz": [14, 16], "download_and_extract_zip_arch": 14, "r": [14, 15, 16, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "zip_ref": 14, "extractal": [14, 16], "temporarydirectori": [14, 17, 20], "tmpdir": [14, 16, 17, 18, 20], "img_root": [14, 16, 41, 45], "splitext": 14, "totensor": [14, 16], "topilimag": [14, 16], "image_fil": [14, 16], "fname": [14, 16], "append": [14, 16, 17, 20, 22, 45], "len": [14, 16, 18], "break": [14, 30], "minit": [14, 16], "2000": [14, 16], "tar_path": [14, 16], "pack": [14, 16], "mode": [14, 16, 29, 30, 36, 39, 40], "w": [14, 33], "arcnam": 14, "rpath": [14, 16, 18], "get_fs_token_path": [14, 16, 18], "assert": [14, 16, 17, 18, 27], "rm": 14, "global": [14, 17, 20, 22], "sphinx_gallery_thumbnail_path": [14, 16, 17, 18, 19, 20, 22, 23, 24], "_static": [14, 16, 17, 18, 19, 20, 22, 23, 24], "img": [14, 16, 17, 18, 19, 20, 22, 23, 24, 41, 47], "png": [14, 16, 17, 18, 19, 20, 22, 23, 24], "demonstr": [15, 33], "themselv": 15, "notic": [15, 35, 38, 42, 47], "pip": [15, 25, 29, 35, 38, 39], "git": [15, 29, 36], "clone": [15, 29, 36], "github": [15, 22, 39, 40, 44, 46], "torchx_vers": 15, "sed": 15, "checkout": [15, 27, 29, 47], "dev": [15, 25, 29, 35, 39, 40, 42, 45], "txt": [15, 29, 43, 45, 46], "repo": [15, 29, 36, 47], "interpret": [15, 18, 22, 25, 41, 45], "sever": [15, 45], "ism": 15, "respect": [15, 34, 47], "profil": [15, 20, 29, 36], "examples_apps_python": 15, "examples_apps_jupyt": 15, "numpi": [16, 17, 29], "pl": [16, 18, 20], "dataload": [16, 17], "imagefoldersamplesdataset": 16, "imagefold": 16, "sampl": [16, 46], "num_sampl": [16, 20], "super": [16, 18, 19], "__len__": 16, "fixm": [16, 17, 18, 31], "attribut": [16, 17, 43], "test_d": 16, "train_d": 16, "val_d": 16, "tinyimagenetdatamodul": [16, 17, 20], "lightningdatamodul": 16, "data_dir": [16, 17, 20], "batch_siz": [16, 17, 20], "loader": 16, "img_transform": 16, "val": [16, 18, 20], "train_dataload": 16, "val_dataload": 16, "test_dataload": [16, 17], "download_data": [16, 17, 20], "remote_path": [16, 18], "unextract": 16, "isdir": 16, "data_path": [16, 17, 20, 22], "create_random_data": [16, 17, 20], "num_imag": 16, "250": 16, "fill": [16, 31, 34], "randomli": 16, "64x64": 16, "preprocess": [16, 17, 22], "train_path": 16, "class1_train_path": 16, "class1": 16, "class2_train_path": 16, "class2": 16, "val_path": 16, "class1_val_path": 16, "class2_val_path": 16, "test_path": 16, "class1_test_path": 16, "class2_test_path": 16, "makedir": [16, 17, 20], "fileexistserror": 16, "rang": [16, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pixel": 16, "rand": 16, "255": 16, "im": 16, "fromarrai": 16, "astyp": 16, "uint8": 16, "rgb": 16, "rand_image_": 16, "jpeg": 16, "process_imag": 16, "lib": [16, 18, 19, 29, 45], "seri": [17, 29, 36], "gradient": [17, 22], "overlai": [17, 29, 47], "ai": 17, "cifar_torchvision_interpret": 17, "load_path": [17, 20, 22], "last": [17, 22, 45], "viewer": [17, 28], "visual": 17, "equal": [17, 45], "benefit": 17, "swap": 17, "itertool": 17, "tinyimagenetmodel": [17, 18, 20], "otherwis": [17, 20, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "crash": [17, 45], "np": 17, "attr": 17, "integratedgradi": 17, "viz": 17, "checkpoint": [17, 20, 22, 29, 36], "weight": [17, 33], "analysi": 17, "convert_to_rgb": 17, "arr": 17, "tensor": [17, 18, 29], "ndarrai": 17, "24": [17, 29, 36, 40, 45], "arrai": 17, "squeez": 17, "swapax": 17, "shape": 17, "invalid": [17, 30, 45], "produc": [17, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "init": [17, 20], "load_from_checkpoint": [17, 20], "checkpoint_path": [17, 20], "els": [17, 20, 22], "ig": 17, "first": [17, 22, 23, 24, 29, 30, 34, 45, 46, 47], "islic": 17, "unsqueez": 17, "dim": 17, "zero_grad": 17, "attr_ig": 17, "delta": 17, "baselin": 17, "return_convergence_delta": 17, "count_nonzero": 17, "toi": [17, 18], "sometim": 17, "due": [17, 39, 41], "fig": 17, "axi": 17, "visualize_image_attr": 17, "blended_heat_map": 17, "sign": [17, 29, 37], "show_colorbar": 17, "titl": 17, "out_path": [17, 18], "ig_": 17, "heatmap": 17, "wb": 17, "savefig": 17, "regress": 18, "tupl": [18, 28, 35, 36, 39, 40, 44, 45, 47], "jit": 18, "nn": 18, "torchmetr": 18, "accuraci": [18, 30, 33], "resnet": [18, 29], "basicblock": [18, 29], "lightningmodul": 18, "linear": [18, 29], "net": [18, 27], "layer_s": 18, "lr": [18, 20], "001": 18, "small": [18, 29, 39, 40], "tweak": 18, "match": [18, 31, 44], "tinyimagenet": 18, "avgpool": 18, "adaptiveavgpool2d": 18, "fc": [18, 29], "out_featur": [18, 29], "train_acc": [18, 20], "val_acc": [18, 20], "training_step": 18, "batch": [18, 20, 25, 34], "batch_idx": 18, "_step": 18, "validation_step": 18, "val_batch": 18, "step_nam": 18, "acc_metr": 18, "y": 18, "y_pred": 18, "loss": 18, "cross_entropi": 18, "_loss": 18, "_acc": 18, "todo": 18, "aivan": 18, "fb": 18, "cannot": [18, 27, 41, 42, 45], "configure_optim": 18, "adamw": 18, "export_inference_model": [18, 20], "torchscript": 18, "serial": [18, 33, 43], "dure": [18, 29, 36, 41, 45], "jite": 18, "jit_path": 18, "model_jit": 18, "model_nam": [18, 22], "tiny_image_net": [18, 22], "mar_path": 18, "mar": [18, 22], "handler": 18, "durat": [19, 29, 36], "ax": 19, "simpleloggingprofil": [19, 20], "action": [19, 20, 30], "report": [19, 30], "duration_": 19, "event": [19, 29, 44], "current_act": 19, "action_nam": 19, "valueerror": [19, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "monoton": 19, "stop": [19, 30, 45], "end_tim": 19, "start_tim": 19, "pop": 19, "log_metr": 19, "runtim": [20, 28, 29, 31, 32, 33, 37, 39, 40, 41, 45, 47], "epoch": [20, 22], "log_path": [20, 22], "skip_export": 20, "1x1": [20, 22], "addit": [20, 29, 31, 36, 40, 41, 45], "callback": 20, "store_tru": 20, "narg": 20, "mlp": 20, "hidden": 20, "neural": 20, "get_model_checkpoint": 20, "behav": [20, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "deadlock": 20, "train_loss": 20, "dirpath": [20, 43], "save_last": 20, "checkpoint_callback": 20, "save_dir": 20, "lightning_log": [20, 22], "num_nod": 20, "group_world_s": 20, "acceler": 20, "cuda": 20, "is_avail": 20, "devic": [20, 29, 35, 37, 39, 40, 41, 45], "local_world_s": 20, "strategi": 20, "max_epoch": 20, "acc": 20, "intro": 21, "examples_pipelines_python": 21, "examples_pipelines_jupyt": 21, "someth": [22, 26], "dist_ddp": 22, "utils_copi": 22, "utils_python": 22, "container_from_app": [22, 24, 28], "modifi": [22, 45, 46], "rebuild": [22, 47], "awai": 22, "blob": [22, 33, 40], "readm": [22, 29], "md": [22, 40], "svc": 22, "somewher": 22, "copy_app": 22, "next": 22, "raw": [22, 30, 39, 45], "previou": [22, 27, 45, 46], "ahead": 22, "fulli": [22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "processed_data_path": 22, "datapreproc_app": 22, "fast": [22, 25], "autom": 22, "__file__": 22, "dirnam": 22, "logs_path": 22, "models_path": 22, "trainer_app": 22, "3000": 22, "ui_metadata": [22, 28], "serve_app": 22, "initial_work": 22, "interpret_path": 22, "interpret_app": 22, "track": [22, 25, 29, 39, 44, 46], "set_tti": 22, "respons": [22, 34, 45], "compil": [22, 23, 24, 28], "pipeline_func": [22, 23, 24, 28], "package_path": [22, 23, 24, 28], "rt": [22, 23, 24], "advanced_pipelin": 22, "resource_from_app": [23, 28], "volcano": [23, 28, 29, 31, 39], "echo_app": [23, 24], "alpin": [23, 24, 35, 36, 39, 40, 42], "instanti": [23, 24, 34, 41], "echo_contain": [23, 24], "baseop": 23, "sdk": [23, 24, 28, 29], "chain": [23, 24, 33], "dist_pipelin": 23, "introductori": 24, "cross": 24, "mechan": [24, 37, 45, 46], "wherev": 24, "component_from_app": [24, 28], "convers": 24, "intro_pipelin": 24, "univers": 25, "launcher": 25, "research": 25, "product": 25, "concept": [25, 29, 39, 40], "torchxconfig": [25, 46], "mcad": [25, 29, 34], "rai": [25, 29, 34], "sagemak": [25, 29, 34], "ibm": [25, 34], "spectrum": [25, 34], "lsf": [25, 29, 34], "gcp": [25, 29, 34], "airflow": [25, 26], "deploy": [26, 40, 41], "assembl": 26, "easili": 27, "No": 27, "special": 27, "datetim": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pendulum": 27, "dagrunst": 27, "taskinstancest": 27, "dagruntyp": 27, "dag": 27, "decor": 27, "data_interval_start": 27, "2021": [27, 29], "tz": 27, "utc": 27, "data_interval_end": 27, "timedelta": 27, "dai": [27, 29, 44], "virtualenv": [27, 44], "task_id": 27, "hello_torchx": 27, "run_torchx": 27, "statu": [27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "wait_interv": [27, 30], "raise_for_statu": [27, 45], "didn": 27, "succe": 27, "final": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "log_lin": [27, 30], "make_uniqu": 27, "dag_id": 27, "example_python_oper": 27, "schedule_interv": 27, "start_dat": 27, "catchup": 27, "run_job": 27, "dagrun": 27, "create_dagrun": 27, "execution_d": 27, "data_interv": 27, "run_typ": 27, "ti": 27, "get_task_inst": 27, "get_task": 27, "ignore_ti_st": 27, "success": 27, "ipykernel_3946": 27, "454499020": 27, "removedinairflow3warn": 27, "deprec": [27, 30, 45], "futur": [27, 30, 45, 46], "releas": [27, 39, 40, 46], "23t15": 27, "04": 27, "703": 27, "0000": 27, "taskinst": 27, "2612": 27, "met": 27, "dep_context": 27, "requeueabl": 27, "lt": [27, 29], "btw5k45qjjfcld": 27, "manual__2021": 27, "13t00": 27, "00": 27, "709": 27, "2865": 27, "710": 27, "2946": 27, "queued_dur": 27, "720": 27, "2888": 27, "_pythondecoratedoper": 27, "05": [27, 29], "249": 27, "3131": 27, "var": [27, 29, 34, 41], "airflow_ctx_dag_own": 27, "airflow_ctx_dag_id": 27, "airflow_ctx_task_id": 27, "airflow_ctx_execution_d": 27, "airflow_ctx_dag_run_id": 27, "queu": 27, "704675": 27, "252": 27, "731": 27, "endgroup": 27, "955": 27, "72": 27, "958": 27, "local_schedul": [27, 34, 41], "771": 27, "959": 27, "777": 27, "torchx_2_iu5bt7": 27, "064": 27, "240": 27, "valu": [27, 29, 31, 33, 36, 39, 40, 41, 45, 46, 47], "069": 27, "340": 27, "post": 27, "070": 27, "352": 27, "mark": 27, "run_id": [27, 46], "20210913t000000": 27, "20240923t155504": 27, "end_dat": 27, "20240923t155506": 27, "queued_at": 27, "hostnam": 27, "41vifxnojfsejhejp0axg4rq0g": 27, "bx": 27, "cloudapp": 27, "goe": 27, "unspecifi": 28, "app_def": 28, "service_account": [28, 29, 39, 40], "resourceop": 28, "containerfactori": 28, "equival": [28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "load_component_from_": 28, "www": [28, 42], "legaci": 28, "component_spec_from_app": 28, "notabl": 28, "protocol": 28, "log_level": 29, "cancel": [29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "job_nam": [29, 36], "overwrit": [29, 30, 41], "extra": [29, 30, 45, 47], "itself": 29, "torchx_5umfql3w": 29, "x79jqw2xl224bc": 29, "de401ca8ebeb": 29, "411561f3d284": 29, "eea7936212d8": 29, "eea7936212d84823304b8ef0b67bc10a172507ffd482d3e2708570e6c4bad211": 29, "28": 29, "kp7nk9sndlv1sd": 29, "relat": [29, 45], "interest": 29, "dist_app": [29, 42], "init_process_group": 29, "gloo": [29, 42], "am": 29, "get_rank": 29, "get_world_s": 29, "2x2": [29, 42], "31": 29, "38": 29, "238275fff00a": 29, "e9fb20f84299": 29, "2909315be35d": 29, "2909315be35d852b07547b81ed3442beb080ea59a595b1285ba7aa0bcb5e83d9": 29, "47": 29, "189": 29, "omp_num_thread": 29, "overload": 29, "tune": [29, 33], "193": 29, "lnb5t0lj51c3zc": 29, "aws_batch": [29, 35, 36], "basi": [29, 39], "daemon": [29, 47], "image_repo": [29, 35, 36, 37, 39, 40], "partit": [29, 44], "copy_env": [29, 37], "privileg": [29, 35, 37, 39, 40], "quiet": [29, 35, 36, 37, 39], "glob": [29, 37], "foo_": [29, 37], "eiher": [29, 37], "semicolon": [29, 37], "ones": [29, 31, 37, 45], "elev": [29, 35, 37], "permiss": [29, 35, 37, 45], "suppress": [29, 35, 36, 37, 39], "verbos": [29, 35, 36, 37, 39], "prepend_cwd": [29, 41], "auto_set_cuda_visible_devic": [29, 41], "prepend": [29, 41], "cuda_available_devic": [29, 41], "assign": [29, 41, 45], "noth": [29, 30, 31, 41], "count": [29, 39, 40, 41], "comment": [29, 44], "constraint": [29, 44], "mail": [29, 44], "job_dir": [29, 44, 47], "hour": [29, 44], "torchxslurmjobdir": [29, 44], "priority_class": [29, 39], "account": [29, 39, 40, 44], "pod": [29, 39, 40], "priorityclass": [29, 39, 40], "kubernetes_mcad": [29, 40], "prioriti": [29, 35, 40, 45], "priority_class_nam": [29, 40], "image_secret": [29, 40], "coscheduler_nam": [29, 40], "network": [29, 36, 40, 41, 42], "higher": [29, 33, 35, 40], "integ": [29, 40], "admin": [29, 40], "openshift": [29, 40], "secret": [29, 40], "privat": [29, 36, 40, 42], "co": [29, 40], "beyond": [29, 33, 40], "share_id": [29, 35], "job_role_arn": [29, 35], "execution_role_arn": [29, 35], "usernam": [29, 35, 36], "getpass": [29, 35, 36], "getus": [29, 35, 36], "polici": [29, 35, 39, 41, 45], "9999": [29, 35], "amazon": [29, 35, 36, 45], "arn": [29, 35, 36], "iam": [29, 35, 36], "ec": [29, 35], "agent": [29, 35], "xdg": 29, "aws_sagemak": [29, 36], "instance_typ": [29, 36], "instance_count": [29, 36], "keep_alive_period_in_second": [29, 36], "volume_s": [29, 36], "volume_kms_kei": [29, 36], "max_run": [29, 36], "input_mod": [29, 36], "output_kms_kei": [29, 36], "base_job_nam": [29, 36], "subnet": [29, 36], "security_group_id": [29, 36], "model_uri": [29, 36], "model_channel_nam": [29, 36], "metric_definit": [29, 36], "encrypt_inter_container_traff": [29, 36], "use_spot_inst": [29, 36], "max_wait": [29, 36], "checkpoint_s3_uri": [29, 36], "checkpoint_local_path": [29, 36], "debugger_hook_config": [29, 36], "enable_sagemaker_metr": [29, 36], "enable_network_isol": [29, 36], "disable_profil": [29, 36], "max_retry_attempt": [29, 36], "source_dir": [29, 36], "git_config": [29, 36], "hyperparamet": [29, 36], "container_log_level": [29, 36], "code_loc": [29, 36], "training_repository_access_mod": [29, 36], "training_repository_credentials_provider_arn": [29, 36], "disable_output_compress": [29, 36], "enable_infra_check": [29, 36], "artifact": [29, 36, 46, 47], "ec2": [29, 35, 36, 45], "c4": [29, 36], "xlarg": [29, 36], "instance_group": [29, 36], "warm": [29, 36], "pool": [29, 36], "subsequ": [29, 36], "gb": [29, 33, 36], "km": [29, 36], "encrypt": [29, 36], "eb": [29, 36], "attach": [29, 36, 40, 47], "60": [29, 36], "algorithm": [29, 36], "estim": [29, 36], "timestamp": [29, 36], "vpc": [29, 36], "secur": [29, 36], "pre": [29, 31, 36], "channel": [29, 36], "traffic": [29, 36], "persist": [29, 36, 39, 40, 45, 46], "emit": [29, 36], "debugg": [29, 36], "unless": [29, 36, 43], "region": [29, 36], "isol": [29, 36, 44], "move": [29, 36, 45], "asid": [29, 36], "branch": [29, 36, 40], "commit": [29, 36], "2fa_en": [29, 36], "password": [29, 36], "token": [29, 36], "lambda": [29, 36], "credenti": [29, 35, 36, 38], "authent": [29, 35, 36, 38, 47], "compress": [29, 36], "gcp_batch": [29, 38], "central1": [29, 38], "cluster_config_fil": [29, 43], "cluster_nam": [29, 43], "dashboard_address": [29, 43], "127": [29, 43], "8265": [29, 43], "dashboard": [29, 43], "address": [29, 43], "against": [29, 43, 45], "lsf_queue": [29, 42], "jobdir": [29, 42], "container_workdir": [29, 42], "host_network": [29, 42], "shm_size": [29, 42], "64m": [29, 42], "shm": [29, 42], "timm_app": 29, "timm": 29, "resnet18": 29, "cuda11": 29, "cudnn8": 29, "newli": [29, 47], "56": 29, "c3f17e5ac010": 29, "2fbb8f7df77c": 29, "py3": 29, "whl": 29, "satisfi": 29, "opt": [29, 45], "conda": [29, 44], "python3": 29, "site": 29, "pyyaml": 29, "safetensor": 29, "cp37": 29, "cp37m": 29, "manylinux_2_17_x86_64": 29, "manylinux2014_x86_64": 29, "436": 29, "kb": 29, "huggingfac": 29, "hub": 29, "huggingface_hub": 29, "268": 29, "typing_extens": 29, "filelock": 29, "42": 29, "61": 29, "2023": 29, "143": 29, "importlib": 29, "importlib_metadata": 29, "zipp": 29, "certifi": 29, "2017": 29, "idna": 29, "urllib3": 29, "chardet": 29, "pillow": 29, "29": 29, "cd59277de919": 29, "1b7cd0dc068f": 29, "27d618b9e7a2": 29, "33": 29, "50dd9580bcfb": 29, "50dd9580bcfb2fe652d2897cbb2b8f4b954454288d3bc644b45146bb7532773d": 29, "conv1": 29, "conv2d": 29, "kernel_s": 29, "stride": 29, "pad": 29, "bia": 29, "bn1": 29, "batchnorm2d": 29, "ep": 29, "1e": 29, "momentum": 29, "affin": 29, "track_running_stat": 29, "act1": 29, "relu": 29, "inplac": 29, "maxpool": 29, "maxpool2d": 29, "dilat": 29, "ceil_mod": 29, "layer1": 29, "sequenti": [29, 33], "drop_block": 29, "aa": 29, "conv2": 29, "bn2": 29, "act2": 29, "layer2": 29, "128": 29, "downsampl": 29, "layer3": 29, "256": 29, "layer4": 29, "512": 29, "global_pool": 29, "selectadaptivepool2d": 29, "pool_typ": 29, "avg": 29, "flatten": 29, "start_dim": 29, "end_dim": 29, "in_featur": 29, "1000": 29, "36": 29, "dgl1xfq9j5jbw": 29, "runcfg": [30, 31, 41], "component_default": 30, "close": [30, 34, 41], "human": 30, "readabl": 30, "constructor": [30, 34], "scheduler_factori": 30, "schedulerfactori": [30, 34], "individu": [30, 42], "act": 30, "upon": [30, 45], "cach": 30, "direct": 30, "soon": 30, "interrupt": 30, "clean": 30, "deem": [30, 34, 41], "associ": [30, 45], "undefin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "ok": 30, "reconstruct": 30, "much": 30, "anymor": 30, "union": [30, 31, 35, 42, 45, 47], "parent_run_id": 30, "appdryruninfo": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dry": [30, 34], "pretti": 30, "dryrun_info": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dryrun_compon": 30, "component_arg": 30, "Will": 30, "listapprespons": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prototyp": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "phase": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "subject": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "should_tail": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "honor": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "guarante": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "highli": 30, "log_it": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "discourag": 30, "partial": [30, 35, 36, 37, 39, 41, 43, 44], "purg": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "whitespac": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "charact": 30, "newlin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "configvalu": [30, 45, 46], "present": [30, 31, 43, 45, 47], "anti": 30, "experi": [30, 46], "matches_regex": 30, "model_accuraci": 30, "parse_accuraci": 30, "experiment_nam": 30, "th": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "fetch": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "left": 30, "empti": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cursor": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "begin": 30, "unknownappexcept": 30, "order": [30, 31, 40, 45], "low": [30, 31], "file_path": 30, "componentvalidationexcept": 30, "componentnotfoundexcept": 30, "sparingli": 30, "abus": 30, "lead": 30, "go": 30, "complianc": 30, "term": 30, "unblock": 30, "certain": [30, 31, 41, 47], "short": 30, "scheduler_backend": [30, 34], "scheduler_run_opt": 30, "local_runopt": 30, "past": 30, "replac": [30, 31, 45, 47], "indefinit": 30, "app_statu": 30, "is_termin": 30, "sleep": [30, 33], "beta": [31, 47], "ini": 31, "sensibl": 31, "placehold": 31, "happi": 31, "redundantli": 31, "decid": 31, "date": 31, "leav": 31, "stale": 31, "ls": 31, "enviorn": 31, "torchx_config": 31, "hierarchi": 31, "overlaid": [31, 47], "malform": 31, "unrecogn": 31, "2x8": 31, "overwritten": [31, 33], "cmd": [31, 42, 44, 45], "addition": [31, 46], "some_workspac": 31, "outmost": 31, "hold": [31, 41, 44, 45], "dir_1": 31, "dir_2": 31, "textio": 31, "configfil": 31, "dump": [31, 33, 43], "required_onli": 31, "templat": [31, 45], "find_config": 31, "filepath": 31, "element": [31, 45], "get_config": 31, "barr": 31, "bazz": 31, "fooo": 31, "load_sect": 31, "content": [31, 44, 47], "categor": 32, "topic": [32, 42], "experiment": [33, 46], "AT": [33, 46], "risk": [33, 46], "TO": [33, 46], "keyword": 33, "intention": 33, "constrain": [33, 39, 40], "hundr": 33, "nor": 33, "quantiti": [33, 45], "hyper": 33, "suppos": 33, "app1": 33, "app2": 33, "feed": 33, "seem": 33, "worri": 33, "pseudo": 33, "do_someth": 33, "s3client": 33, "utf": 33, "output_fil": 33, "input_fil": 33, "decod": 33, "do_something_els": 33, "app1_out": 33, "app1_accuraci": 33, "l2norm": 33, "liter": [33, 45], "1kb": 33, "slash": 33, "statist": 33, "sem": 33, "uniqu": [33, 34, 43, 44, 45], "scope": 33, "central": 33, "entiti": 33, "strong": 33, "made": [33, 45], "similarli": 33, "consecut": 33, "BE": 33, "min": 33, "strongli": 33, "advis": 33, "concaten": 33, "experiment_id": 33, "trial_numb": 33, "123": 33, "attempt_1": 33, "233": 33, "outsid": 33, "get_scheduler_factori": 34, "get_default_scheduler_nam": 34, "default_scheduler_nam": 34, "abc": 34, "abstractmethod": 34, "kill": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "idempot": 34, "thread": [34, 41, 45], "safe": 34, "underli": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "longer": [34, 41], "wrap": [34, 40, 41, 46], "describeapprespons": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "qualifi": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "constitut": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "caller": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prior": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "Is": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "twice": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lost": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "live": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "arbitrari": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "stopiter": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "exhaust": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stuck": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "eventu": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "__getitem__": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "seek": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "50th": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "carriag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "select": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "combin": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "notimplementederror": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "encourag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "trivial": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "submit_dryrun": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "mostli": 34, "regard": 34, "not_set": 34, "appstat": [34, 40, 45], "unsubmit": [34, 45], "num_restart": [34, 45], "structured_error_msg": [34, 45], "ui_url": [34, 45], "roles_status": 34, "rolestatu": [34, 45], "suffici": 34, "recret": 34, "member": 34, "accessor": [34, 45], "popul": [34, 45], "userguid": 35, "batch_getstart": 35, "ecr": 35, "amazonecr": 35, "aws_batch_schedul": 35, "awsbatchschedul": 35, "log_client": 35, "docker_cli": [35, 36, 39, 40, 47], "dockercli": [35, 36, 39, 40, 47], "dockerworkspacemixin": [35, 36, 37, 39, 40, 47], "awsbatchopt": 35, "torchx_us": [35, 36, 39, 44], "1234": [35, 36, 39, 44], "ef": 35, "infiniband": 35, "uverbs0": 35, "perm": [35, 39, 40, 45], "rwm": [35, 37, 39, 40, 45], "parse_mount": [35, 37, 39, 40, 45], "fsx": 35, "repost": 35, "knowledg": 35, "center": 35, "lustr": 35, "fabric": 35, "efa": 35, "batchjob": 35, "nonetyp": [35, 42], "job_def": [35, 36, 38], "images_to_push": [35, 36, 39, 40, 47], "aws_sagemaker_schedul": 36, "awssagemakerschedul": 36, "awssagemakeropt": 36, "sagemakerschedul": 36, "awssagemakerjob": 36, "requri": 36, "docker_schedul": 37, "dockerschedul": 37, "dockeropt": 37, "closest": 37, "dockerjob": 37, "dockercontain": 37, "has_dock": 37, "gcp_batch_schedul": 38, "gcpbatchschedul": 38, "gcpbatchopt": 38, "app_id1234": 38, "gcloud": 38, "gcpbatchjob": 38, "batch_v1": 38, "upgrad": 39, "kubectl": 39, "githubusercont": 39, "develop": 39, "kubernetes_schedul": 39, "kubernetesschedul": 39, "apicli": [39, 40], "kubernetesopt": 39, "confirm": [39, 40], "issu": [39, 40, 42, 44], "120": 39, "occur": [39, 45], "bug": 39, "1651": 39, "extern": [39, 40], "hostpath": [39, 40], "persistentvolumeclaim": [39, 40], "claim": [39, 40], "16000": [39, 40], "reserv": [39, 40], "whole": [39, 40, 45], "reduc": [39, 40], "amount": [39, 40], "kubernetesjob": 39, "app_to_resourc": [39, 40], "macro": 39, "pod_label": [39, 40], "role_idx": [39, 40], "role_to_pod": [39, 40], "v1pod": [39, 40], "sanitize_for_seri": [39, 40], "obj": [39, 40, 45], "dispatch": 40, "appwrapp": 40, "codeflar": 40, "kubernetes_mcad_schedul": 40, "kubernetesmcadschedul": 40, "kubernetesmcadopt": 40, "among": 40, "e790d7f": 40, "your_image_repo": 40, "secondari": 40, "coschedul": 40, "podgroup": 40, "sig": 40, "tree": 40, "pkg": 40, "crd": 40, "k8": 40, "io_podgroup": 40, "At": 40, "guidanc": 40, "evict": [40, 45], "preemption": [40, 45], "multu": 40, "k8snetworkplumbingwg": 40, "cni": 40, "kubernetesmcadjob": 40, "mcad_svc": 40, "svc_name": 40, "service_port": 40, "v1servic": 40, "get_appwrapper_statu": 40, "get_port_for_servic": 40, "get_role_inform": 40, "generic_item": 40, "get_tasks_status_descript": 40, "unique_app_id": 40, "localschedul": 41, "image_provider_class": 41, "localopt": 41, "imageprovid": 41, "cache_s": 41, "extra_path": 41, "properti": [41, 45, 46], "enforc": 41, "orphan": 41, "cleanup": 41, "receiv": 41, "sigterm": 41, "sigint": 41, "spawn": 41, "faster": 41, "softwar": [41, 45], "cuda_visible_devic": 41, "accord": [41, 45], "replica_0": 41, "replica_1": 41, "role_0": 41, "role_1": 41, "replica_2": 41, "localhostschedul": 41, "real": 41, "op": 41, "fetch_rol": 41, "updat": [41, 47], "compli": [41, 45], "deleg": 41, "get_cwd": 41, "child": [41, 45], "get_entrypoint": 41, "get_replica_param": 41, "holder": 41, "cwdimageprovid": 41, "localdirectoryimageprovid": 41, "getcwd": 41, "conjunct": 41, "not_exist": 41, "image_typ": 41, "childprocess": 41, "logiter": 41, "log_fil": 41, "_popen": 41, "signalexcept": 41, "sigval": 41, "signal": 41, "got": 41, "feedback": 42, "edit": 42, "pak": 42, "lsf_schedul": 42, "lsfschedul": 42, "lsfopt": 42, "mnt": 42, "tofix": 42, "On": 42, "reoslv": 42, "lsfbsub": 42, "ray_schedul": 43, "rayschedul": 43, "ray_client": 43, "jobsubmissioncli": 43, "tmpdirworkspacemixin": 43, "rayopt": 43, "actor": 43, "torchxignor": [43, 47], "overridden": 43, "dummi": 43, "rayjob": 43, "wait_until_finish": 43, "has_rai": 43, "indic": 43, "rayactor": 43, "output_filenam": 43, "working_dir": 43, "ray_common": 43, "ip": 43, "connect": 43, "ray_main": 43, "slurm_schedul": 44, "slurmschedul": 44, "dirworkspacemixin": [44, 47], "slurmopt": 44, "heterogen": 44, "sbatch": 44, "jobid": 44, "abl": 44, "schedmd": 44, "section_opt": 44, "inherit": 44, "activ": 44, "heterogeneous_job": 44, "snapshot": 44, "1gb": 44, "realmemori": 44, "workaround": 44, "parallelclust": 44, "2198": 44, "slurmbatchrequest": 44, "slurmreplicarequest": 44, "srun_opt": 44, "sbatch_opt": 44, "classmethod": 44, "from_rol": 44, "nomem": 44, "srun": 44, "treatment": 45, "min_replica": 45, "base_imag": 45, "miss": 45, "bindmount": 45, "volumemount": 45, "devicemount": 45, "duti": 45, "ps": 45, "bundl": 45, "dictat": 45, "ball": 45, "my_imag": 45, "env_var": 45, "500": 45, "tcp_store": 45, "8080": 45, "auto": 45, "scale": 45, "give": 45, "least": 45, "9090": 45, "pre_proc": 45, "encount": 45, "unsuccess": 45, "hardwar": 45, "caveat": 45, "surviv": 45, "untouch": 45, "membership": 45, "departur": 45, "admitt": 45, "physic": 45, "ram": 45, "predec": 45, "registr": 45, "retriev": 45, "gpu_x_1": 45, "named_resources_aw": 45, "taken": 45, "mere": 45, "equval": 45, "mem": 45, "aws_t3": 45, "medium": 45, "aws_m5": 45, "8xlarg": 45, "aws_m5_2xlarg": 45, "aws_p3_2xlarg": 45, "aws_p3_8xlarg": 45, "aws_t3_medium": 45, "mention": 45, "image_root_dir": 45, "train_app": 45, "rank0_env": 45, "base_img_root": 45, "accept": 45, "run_config_opt": 45, "run_as_us": 45, "type_": 45, "cluster_id": 45, "preemptibl": 45, "illeg": 45, "bad_typ": 45, "cfg_kei": 45, "cfg_from_json_repr": 45, "json_repr": 45, "cfg_from_str": 45, "cfg_str": 45, "cast": 45, "appropri": 45, "unknown": 45, "cfg_liter": 45, "kv": 45, "semi": 45, "colon": 45, "cfgval": 45, "trail": 45, "strictli": 45, "correct": 45, "is_typ": 45, "tp": 45, "isinst": 45, "text": 45, "recent": 45, "filter_rol": 45, "appstatuserror": 45, "pend": 45, "yet": [45, 46], "unsuccessfulli": 45, "replicast": 45, "alia": 45, "src_path": 45, "dst_path": 45, "read_onli": 45, "mknode": 45, "file_lint": 45, "component_funct": 45, "lintermessag": 45, "vaidat": 45, "stypl": 45, "get_fn_docstr": 45, "char": 45, "torchfunctionvisitor": 45, "component_function_nam": 45, "visitor": 45, "torchxfunctionargsvalid": 45, "criteria": 45, "primitive_typ": 45, "visit_functiondef": 45, "functiondef": 45, "torchxargumenthelpformatt": 45, "prog": 45, "indent_incr": 45, "max_help_posit": 45, "width": 45, "formatt": 45, "app_specs_func_def": 45, "torchxfunctionvalid": 45, "torchxreturnvalid": 45, "practition": 46, "conceptu": 46, "uniform": 46, "solut": 46, "tracker_nam": 46, "inject": 46, "entry_point_or_module_factory_method": 46, "tracker1": 46, "tracker2": 46, "backend_2_entry_point": 46, "tracker3": 46, "mlflow": 46, "create_track": 46, "my_bucket": 46, "my_config": 46, "discover": 46, "accomplish": 46, "entry_point_nam": 46, "create_tracker_fn": 46, "app_run_from_env": 46, "torchx_job_id": 46, "app_run": 46, "fsspectrack": 46, "cmdtracker": 46, "parent": 46, "artifact_nam": 46, "consumpt": 46, "encapsul": 46, "stil": 46, "abstractfilesystem": [46, 47], "root_dir": 46, "backward": 46, "gurante": 46, "subdir": 46, "descend": 46, "cmd_tracker": 46, "workspacemixin": 47, "mix": 47, "abil": 47, "codebas": 47, "build_workspace_and_update_rol": 47, "simplest": 47, "effici": 47, "increment": 47, "mutat": 47, "dryrun_push_imag": 47, "dryrun_push": 47, "push_imag": 47, "workspace_opt": 47, "walk_workspac": 47, "ignore_nam": 47, "engin": 47, "builder": 47, "exclud": 47, "whose": 47, "_update_app_imag": 47, "_push_imag": 47}, "objects": {"torchx": [[3, 0, 0, "-", "cli"], [8, 0, 0, "-", "components"], [26, 0, 0, "-", "pipelines"], [30, 0, 0, "-", "runner"], [32, 0, 0, "-", "runtime"], [34, 0, 0, "-", "schedulers"], [45, 0, 0, "-", "specs"], [46, 0, 0, "-", "tracker"], [47, 0, 0, "-", "workspace"]], "torchx.cli.cmd_tracker": [[46, 1, 1, "", "CmdTracker"]], "torchx.components": [[4, 0, 0, "-", "component_test_base"], [5, 0, 0, "-", "dist"], [6, 0, 0, "-", "interpret"], [7, 0, 0, "-", "metrics"], [9, 0, 0, "-", "serve"], [10, 0, 0, "-", "train"], [11, 0, 0, "-", "utils"]], "torchx.components.component_test_base": [[4, 1, 1, "", "ComponentTestCase"]], "torchx.components.component_test_base.ComponentTestCase": [[4, 2, 1, "", "run_component"], [4, 2, 1, "", "setUp"], [4, 2, 1, "", "tearDown"], [4, 2, 1, "", "validate"]], "torchx.components.dist": [[5, 3, 1, "", "_TORCH_DEBUG_FLAGS"], [5, 4, 1, "", "ddp"]], "torchx.components.metrics": [[7, 4, 1, "", "tensorboard"]], "torchx.components.serve": [[9, 4, 1, "", "torchserve"]], "torchx.components.utils": [[11, 4, 1, "", "binary"], [11, 4, 1, "", "booth"], [11, 4, 1, "", "copy"], [11, 4, 1, "", "echo"], [11, 4, 1, "", "python"], [11, 4, 1, "", "sh"], [11, 4, 1, "", "touch"]], "torchx.pipelines": [[28, 0, 0, "-", "kfp"]], "torchx.pipelines.kfp.adapter": [[28, 1, 1, "", "ContainerFactory"], [28, 4, 1, "", "component_from_app"], [28, 4, 1, "", "component_spec_from_app"], [28, 4, 1, "", "container_from_app"], [28, 4, 1, "", "resource_from_app"]], "torchx.runner": [[30, 1, 1, "", "Runner"], [31, 0, 0, "-", "config"], [30, 4, 1, "", "get_runner"]], "torchx.runner.Runner": [[30, 2, 1, "", "cancel"], [30, 2, 1, "", "close"], [30, 2, 1, "", "describe"], [30, 2, 1, "", "dryrun"], [30, 2, 1, "", "dryrun_component"], [30, 2, 1, "", "list"], [30, 2, 1, "", "log_lines"], [30, 2, 1, "", "run"], [30, 2, 1, "", "run_component"], [30, 2, 1, "", "schedule"], [30, 2, 1, "", "scheduler_backends"], [30, 2, 1, "", "scheduler_run_opts"], [30, 2, 1, "", "status"], [30, 2, 1, "", "stop"], [30, 2, 1, "", "wait"]], "torchx.runner.config": [[31, 4, 1, "", "apply"], [31, 4, 1, "", "dump"], [31, 4, 1, "", "find_configs"], [31, 4, 1, "", "get_config"], [31, 4, 1, "", "get_configs"], [31, 4, 1, "", "load"], [31, 4, 1, "", "load_sections"]], "torchx.runtime": [[33, 0, 0, "-", "tracking"]], "torchx.runtime.tracking": [[33, 1, 1, "", "FsspecResultTracker"], [33, 1, 1, "", "ResultTracker"]], "torchx.schedulers": [[34, 1, 1, "", "Scheduler"], [34, 1, 1, "", "SchedulerFactory"], [35, 0, 0, "-", "aws_batch_scheduler"], [36, 0, 0, "-", "aws_sagemaker_scheduler"], [37, 0, 0, "-", "docker_scheduler"], [38, 0, 0, "-", "gcp_batch_scheduler"], [34, 4, 1, "", "get_default_scheduler_name"], [34, 4, 1, "", "get_scheduler_factories"], [40, 0, 0, "-", "kubernetes_mcad_scheduler"], [39, 0, 0, "-", "kubernetes_scheduler"], [41, 0, 0, "-", "local_scheduler"], [42, 0, 0, "-", "lsf_scheduler"], [43, 0, 0, "-", "ray_scheduler"], [44, 0, 0, "-", "slurm_scheduler"]], "torchx.schedulers.Scheduler": [[34, 2, 1, "", "cancel"], [34, 2, 1, "", "close"], [34, 2, 1, "", "describe"], [34, 2, 1, "", "exists"], [34, 2, 1, "", "list"], [34, 2, 1, "", "log_iter"], [34, 2, 1, "", "run_opts"], [34, 2, 1, "", "schedule"], [34, 2, 1, "", "submit"], [34, 2, 1, "", "submit_dryrun"]], "torchx.schedulers.api": [[34, 1, 1, "", "DescribeAppResponse"], [34, 1, 1, "", "ListAppResponse"]], "torchx.schedulers.aws_batch_scheduler": [[35, 1, 1, "", "AWSBatchScheduler"], [35, 1, 1, "", "BatchJob"], [35, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_batch_scheduler.AWSBatchScheduler": [[35, 2, 1, "", "describe"], [35, 2, 1, "", "list"], [35, 2, 1, "", "log_iter"], [35, 2, 1, "", "schedule"]], "torchx.schedulers.aws_sagemaker_scheduler": [[36, 1, 1, "", "AWSSageMakerJob"], [36, 1, 1, "", "AWSSageMakerScheduler"], [36, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_sagemaker_scheduler.AWSSageMakerScheduler": [[36, 2, 1, "", "describe"], [36, 2, 1, "", "list"], [36, 2, 1, "", "log_iter"], [36, 2, 1, "", "schedule"]], "torchx.schedulers.docker_scheduler": [[37, 1, 1, "", "DockerContainer"], [37, 1, 1, "", "DockerJob"], [37, 1, 1, "", "DockerScheduler"], [37, 4, 1, "", "create_scheduler"], [37, 4, 1, "", "has_docker"]], "torchx.schedulers.docker_scheduler.DockerScheduler": [[37, 2, 1, "", "describe"], [37, 2, 1, "", "list"], [37, 2, 1, "", "log_iter"], [37, 2, 1, "", "schedule"]], "torchx.schedulers.gcp_batch_scheduler": [[38, 1, 1, "", "GCPBatchJob"], [38, 1, 1, "", "GCPBatchScheduler"], [38, 4, 1, "", "create_scheduler"]], "torchx.schedulers.gcp_batch_scheduler.GCPBatchScheduler": [[38, 2, 1, "", "describe"], [38, 2, 1, "", "list"], [38, 2, 1, "", "log_iter"], [38, 2, 1, "", "run_opts"], [38, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_mcad_scheduler": [[40, 1, 1, "", "KubernetesMCADJob"], [40, 1, 1, "", "KubernetesMCADScheduler"], [40, 4, 1, "", "app_to_resource"], [40, 4, 1, "", "create_scheduler"], [40, 4, 1, "", "get_appwrapper_status"], [40, 4, 1, "", "get_port_for_service"], [40, 4, 1, "", "get_role_information"], [40, 4, 1, "", "get_tasks_status_description"], [40, 4, 1, "", "mcad_svc"], [40, 4, 1, "", "pod_labels"], [40, 4, 1, "", "role_to_pod"], [40, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_mcad_scheduler.KubernetesMCADScheduler": [[40, 2, 1, "", "describe"], [40, 2, 1, "", "list"], [40, 2, 1, "", "log_iter"], [40, 2, 1, "", "run_opts"], [40, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_scheduler": [[39, 1, 1, "", "KubernetesJob"], [39, 1, 1, "", "KubernetesScheduler"], [39, 4, 1, "", "app_to_resource"], [39, 4, 1, "", "create_scheduler"], [39, 4, 1, "", "pod_labels"], [39, 4, 1, "", "role_to_pod"], [39, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_scheduler.KubernetesScheduler": [[39, 2, 1, "", "describe"], [39, 2, 1, "", "list"], [39, 2, 1, "", "log_iter"], [39, 2, 1, "", "schedule"]], "torchx.schedulers.local_scheduler": [[41, 1, 1, "", "CWDImageProvider"], [41, 1, 1, "", "ImageProvider"], [41, 1, 1, "", "LocalDirectoryImageProvider"], [41, 1, 1, "", "LocalScheduler"], [41, 1, 1, "", "LogIterator"], [41, 1, 1, "", "PopenRequest"], [41, 1, 1, "", "ReplicaParam"], [41, 1, 1, "", "SignalException"], [41, 4, 1, "", "create_scheduler"]], "torchx.schedulers.local_scheduler.CWDImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.ImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "fetch_role"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"], [41, 2, 1, "", "get_replica_param"]], "torchx.schedulers.local_scheduler.LocalDirectoryImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.LocalScheduler": [[41, 2, 1, "", "auto_set_CUDA_VISIBLE_DEVICES"], [41, 2, 1, "", "close"], [41, 2, 1, "", "describe"], [41, 2, 1, "", "list"], [41, 2, 1, "", "log_iter"], [41, 2, 1, "", "schedule"]], "torchx.schedulers.lsf_scheduler": [[42, 1, 1, "", "LsfBsub"], [42, 1, 1, "", "LsfScheduler"], [42, 4, 1, "", "create_scheduler"]], "torchx.schedulers.lsf_scheduler.LsfScheduler": [[42, 2, 1, "", "describe"], [42, 2, 1, "", "list"], [42, 2, 1, "", "log_iter"], [42, 2, 1, "", "schedule"]], "torchx.schedulers.ray_scheduler": [[43, 1, 1, "", "RayJob"], [43, 1, 1, "", "RayScheduler"], [43, 4, 1, "", "create_scheduler"], [43, 4, 1, "", "has_ray"], [43, 4, 1, "", "serialize"]], "torchx.schedulers.ray_scheduler.RayScheduler": [[43, 2, 1, "", "describe"], [43, 2, 1, "", "list"], [43, 2, 1, "", "log_iter"], [43, 2, 1, "", "schedule"], [43, 2, 1, "", "wait_until_finish"]], "torchx.schedulers.slurm_scheduler": [[44, 1, 1, "", "SlurmBatchRequest"], [44, 1, 1, "", "SlurmReplicaRequest"], [44, 1, 1, "", "SlurmScheduler"], [44, 4, 1, "", "create_scheduler"]], "torchx.schedulers.slurm_scheduler.SlurmBatchRequest": [[44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmReplicaRequest": [[44, 2, 1, "", "from_role"], [44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmScheduler": [[44, 2, 1, "", "describe"], [44, 2, 1, "", "list"], [44, 2, 1, "", "log_iter"], [44, 2, 1, "", "schedule"]], "torchx.specs": [[45, 1, 1, "", "AppDef"], [45, 1, 1, "", "AppState"], [45, 1, 1, "", "AppStatus"], [45, 1, 1, "", "BindMount"], [45, 1, 1, "", "DeviceMount"], [45, 5, 1, "", "ReplicaState"], [45, 1, 1, "", "Resource"], [45, 1, 1, "", "RetryPolicy"], [45, 1, 1, "", "Role"], [45, 1, 1, "", "VolumeMount"], [45, 0, 0, "-", "file_linter"], [45, 4, 1, "", "get_named_resources"], [45, 1, 1, "", "macros"], [45, 0, 0, "-", "named_resources_aws"], [45, 4, 1, "", "parse_mounts"], [45, 4, 1, "", "resource"], [45, 1, 1, "", "runopts"]], "torchx.specs.AppStatus": [[45, 2, 1, "", "format"], [45, 2, 1, "", "raise_for_status"]], "torchx.specs.Resource": [[45, 2, 1, "", "copy"]], "torchx.specs.Role": [[45, 2, 1, "", "pre_proc"]], "torchx.specs.file_linter": [[45, 1, 1, "", "LinterMessage"], [45, 1, 1, "", "TorchFunctionVisitor"], [45, 1, 1, "", "TorchXArgumentHelpFormatter"], [45, 1, 1, "", "TorchxFunctionArgsValidator"], [45, 1, 1, "", "TorchxFunctionValidator"], [45, 1, 1, "", "TorchxReturnValidator"], [45, 4, 1, "", "get_fn_docstring"], [45, 4, 1, "", "validate"]], "torchx.specs.file_linter.TorchFunctionVisitor": [[45, 2, 1, "", "visit_FunctionDef"]], "torchx.specs.file_linter.TorchxFunctionArgsValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxFunctionValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxReturnValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.macros": [[45, 1, 1, "", "Values"]], "torchx.specs.macros.Values": [[45, 2, 1, "", "apply"], [45, 2, 1, "", "substitute"]], "torchx.specs.named_resources_aws": [[45, 4, 1, "", "aws_m5_2xlarge"], [45, 4, 1, "", "aws_p3_2xlarge"], [45, 4, 1, "", "aws_p3_8xlarge"], [45, 4, 1, "", "aws_t3_medium"]], "torchx.specs.runopts": [[45, 2, 1, "", "add"], [45, 2, 1, "", "cfg_from_json_repr"], [45, 2, 1, "", "cfg_from_str"], [45, 2, 1, "", "get"], [45, 2, 1, "", "is_type"], [45, 2, 1, "", "resolve"]], "torchx.tracker": [[46, 1, 1, "", "AppRun"]], "torchx.tracker.api": [[46, 1, 1, "", "TrackerBase"]], "torchx.tracker.backend.fsspec": [[46, 1, 1, "", "FsspecTracker"]], "torchx.workspace": [[47, 1, 1, "", "WorkspaceMixin"], [47, 0, 0, "-", "dir_workspace"], [47, 0, 0, "-", "docker_workspace"], [47, 4, 1, "", "walk_workspace"]], "torchx.workspace.WorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]], "torchx.workspace.dir_workspace": [[47, 1, 1, "", "DirWorkspaceMixin"]], "torchx.workspace.dir_workspace.DirWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"]], "torchx.workspace.docker_workspace": [[47, 1, 1, "", "DockerWorkspaceMixin"]], "torchx.workspace.docker_workspace.DockerWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:data", "4": "py:function", "5": "py:attribute"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "data", "Python data"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"]}, "titleterms": {"advanc": [0, 22, 46], "usag": [0, 14, 17, 20, 25, 31, 33, 46], "regist": 0, "custom": [0, 8, 12, 29], "schedul": [0, 2, 3, 25, 29, 34], "name": [0, 4, 45], "resourc": [0, 4, 8, 45], "compon": [0, 2, 3, 4, 5, 8, 12, 22, 25, 45], "app": [1, 14], "best": [1, 4, 25], "practic": [1, 4, 25], "data": [1, 14, 15, 46], "pass": [1, 8], "storag": 1, "train": [1, 10], "loop": 1, "metric": [1, 7], "checkpoint": 1, "fine": 1, "tune": 1, "interpret": [1, 6, 17], "model": [1, 17, 18], "packag": 1, "python": 1, "save": 1, "weight": 1, "torchscript": 1, "torchserv": 1, "archiv": 1, "mar": 1, "torch": 1, "serv": [1, 9], "infer": 1, "test": [1, 4], "basic": 2, "concept": 2, "project": 2, "structur": 2, "appdef": [2, 45], "runner": [2, 30], "pipelin": [2, 8, 21, 22, 23, 24, 25, 26, 28], "adapt": 2, "runtim": [2, 25], "next": [2, 27, 29], "step": [2, 27, 29], "cli": [3, 8, 31], "list": 3, "builtin": [3, 5, 8, 12], "support": 3, "argument": [3, 4, 22], "run": [3, 8, 45], "job": [3, 8, 46], "inspect": 3, "what": 3, "dryrun": 3, "describ": 3, "queri": [3, 46], "statu": [3, 45], "view": 3, "log": [3, 19], "entrypoint": 4, "simplifi": 4, "process": 4, "branch": 4, "logic": 4, "document": [4, 25], "compos": 4, "distribut": [4, 5, 23, 29], "defin": 4, "all": [4, 26, 34], "unit": 4, "integr": 4, "ddp": 5, "api": [5, 25, 31], "refer": [7, 25, 35, 36, 37, 38, 39, 40, 41, 42, 46], "overview": [8, 32, 33, 46], "us": 8, "author": 8, "valid": 8, "programmat": [8, 31], "param": 8, "from": 8, "addit": 8, "embed": 10, "script": 10, "util": 11, "hello": [12, 29], "world": [12, 13, 15, 29], "comput": [13, 15], "size": [13, 15], "exampl": [13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 46], "preprocess": [14, 15], "applic": [15, 25], "prerequisit": [15, 35, 38, 39, 40, 42], "lightn": 15, "trainer": [15, 16, 20], "dataset": 16, "tini": 18, "imagenet": 18, "simpl": 19, "profil": 19, "kubeflow": [21, 22, 23, 24, 28], "input": 22, "creat": 22, "definit": 22, "intro": 24, "torchx": [25, 26, 28, 30, 34, 45, 46, 47], "In": 25, "1": [25, 46], "2": [25, 46], "3": 25, "work": 25, "With": 25, "librari": 25, "airflow": 27, "kfp": 28, "quickstart": 29, "instal": 29, "launch": 29, "workspac": [29, 47], "patch": 29, "torchxconfig": [29, 31], "remot": 29, "imag": [29, 41], "docker": [29, 37], "base": [29, 33], "slurm": [29, 44], "function": [30, 31, 34], "class": [30, 34], "config": [31, 45], "track": 33, "resulttrack": 33, "fsspec": 33, "aw": [35, 36, 45], "batch": [35, 38], "sagemak": 36, "gcp": 38, "kubernet": [39, 40], "mcad": 40, "local": 41, "provid": 41, "ibm": 42, "spectrum": 42, "lsf": 42, "rai": 43, "spec": 45, "role": 45, "macro": 45, "mount": 45, "linter": 45, "tracker": 46, "setup": 46, "launcher": 46, "side": 46, "configur": 46, "user": 46, "acquir": 46, "apprun": 46, "instanc": 46, "trackerbas": 46, "implement": 46, "docker_workspac": 47, "dir_workspac": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx": 56}}) \ No newline at end of file