From 990581217f77f1699a58686840709f5138646812 Mon Sep 17 00:00:00 2001 From: TorchX CI Runner Date: Tue, 23 Jul 2024 16:57:07 +0000 Subject: [PATCH] [doc_push][main] built from 4081bcc (main). Redirects: main -> 0.8.0dev0. --- .../examples_apps_python.zip | Bin 31138 -> 31138 bytes .../examples_apps_jupyter.zip | Bin 39578 -> 39578 bytes .../examples_pipelines_python.zip | Bin 13790 -> 13790 bytes .../examples_pipelines_jupyter.zip | Bin 21485 -> 21485 bytes .../schedulers/kubernetes_scheduler.html | 30 +- 0.8.0dev0/custom_components.html | 315 +++++----- 0.8.0dev0/custom_components.ipynb | 547 +++++++----------- 0.8.0dev0/pipelines/airflow.html | 28 +- 0.8.0dev0/pipelines/airflow.ipynb | 68 +-- 0.8.0dev0/quickstart.html | 220 +++---- 0.8.0dev0/quickstart.ipynb | 396 ++++++------- 0.8.0dev0/searchindex.js | 2 +- 12 files changed, 747 insertions(+), 859 deletions(-) diff --git a/0.8.0dev0/_downloads/00d67ca99480eb8a810baf2dcc8feeff/examples_apps_python.zip b/0.8.0dev0/_downloads/00d67ca99480eb8a810baf2dcc8feeff/examples_apps_python.zip index aa234b6ab60b43609784499b7ec71e7f11900cb0..ba4bbf604ee7e483b4d6b4da0ec53bcde97d230c 100644 GIT binary patch delta 134 zcmZ4VnQ_r)M&1B#W)=|!5HN20zL9q!2eV1r_sttQgapBiW0I@cnN5KrC5k&mzzhSk zPm*AUYQPMBFylZxlLRwR_vEAHt{_d5c`G1vXoUkv!Q>egj$ryug$F7##06TUy ACjbBd diff --git a/0.8.0dev0/_downloads/0565b2ee0f8662d1acf800c01d94e038/examples_apps_jupyter.zip b/0.8.0dev0/_downloads/0565b2ee0f8662d1acf800c01d94e038/examples_apps_jupyter.zip index dc9dcd7dd4f8691864015b2ae4f8ea42e7f824e0..fbe2ab8b1baac4c7308a176b00ac27fdb60530f2 100644 GIT binary patch delta 134 zcmbQWm1)*iCf)#VW)=|!5HN20zL7V9huNg<`{n|k08udGuwnu?vnfzSP1i;e%t&=R zqzGncW!w}7GuUe8%P<3VPhK&@1Egv4XDDqq(;1{-a?4B?FnxNaJ(%X5OS7T)-0`$}9q899B%=1~b%jZ6v{rRJTKl zV1`!4O<^#Dt!BOqNY~^QGd#fbXCMvKJlSrhGnj6f=>n!t&$I{Ayt5p@bigbx0NMUE ATL1t6 diff --git a/0.8.0dev0/_downloads/3d78ac59c117d4c58eca135c8ee3f34a/examples_pipelines_python.zip b/0.8.0dev0/_downloads/3d78ac59c117d4c58eca135c8ee3f34a/examples_pipelines_python.zip index bc33923ebd21f0524ffb96c7dd826ed5e8ed2516..111f6b36ef6f7007b400f5f60c87d56ac929f4ff 100644 GIT binary patch delta 64 zcmcbYeJ`6gz?+#xgaHIh+rDq)J;lKcq&MH<;NS)`o(iidf*6x|Ozpw6pQ!_wo@nX| E0KH2V$N&HU delta 64 zcmcbYeJ`6gz?+#xgaHIZR(#vYdy0b@NN>Ky!NCn?JQY?^1TiM_nA(GBKT`)VJ<-$` E0KAJ9$N&HU diff --git a/0.8.0dev0/_downloads/7e7e4caf540a83f1776a6e62b5c440ff/examples_pipelines_jupyter.zip b/0.8.0dev0/_downloads/7e7e4caf540a83f1776a6e62b5c440ff/examples_pipelines_jupyter.zip index ba5a3d7ddae122d109cabfbede9ade5e7177e80b..47e5d7065b3d5cadb1930d930c34b6ed892c35f4 100644 GIT binary patch delta 66 zcmaF6obl~)M&1B#W)=|!5HM}~zLED2A2X2N%){>{2xf#T>@or|CJO~Sf$8vIXD~fC GH~;|TU>COl delta 66 zcmaF6obl~)M&1B#W)=|!5D;1MZ6og=K4u`jnTOv^5X=Zw*kuG_Ocn}u0@LBa&R}|O GZ~y?~pcl6Q diff --git a/0.8.0dev0/_modules/torchx/schedulers/kubernetes_scheduler.html b/0.8.0dev0/_modules/torchx/schedulers/kubernetes_scheduler.html index 6ce6b377b..b39689bb8 100644 --- a/0.8.0dev0/_modules/torchx/schedulers/kubernetes_scheduler.html +++ b/0.8.0dev0/_modules/torchx/schedulers/kubernetes_scheduler.html @@ -610,6 +610,17 @@

Source code for torchx.schedulers.kubernetes_scheduler

LABEL_INSTANCE_TYPE = "node.kubernetes.io/instance-type" +# role.env translates to static env variables in the yaml +# {"FOO" : "bar"} =====> - name: FOO +# value: bar +# unless this placeholder is present at the start of the role.env value then the env variable +# in the yaml will be dynamically populated at runtime (placeholder is stripped out of the value) +# {"FOO" : "[FIELD_PATH]bar"} =====> - name: FOO +# valueFrom: +# fieldRef: +# fieldPath: bar +PLACEHOLDER_FIELD_PATH = "[FIELD_PATH]" +
[docs]def sanitize_for_serialization(obj: object) -> object: from kubernetes import client @@ -624,7 +635,9 @@

Source code for torchx.schedulers.kubernetes_scheduler

V1ContainerPort, V1EmptyDirVolumeSource, V1EnvVar, + V1EnvVarSource, V1HostPathVolumeSource, + V1ObjectFieldSelector, V1ObjectMeta, V1PersistentVolumeClaimVolumeSource, V1Pod, @@ -744,9 +757,20 @@

Source code for torchx.schedulers.kubernetes_scheduler

image=role.image, name=name, env=[ - V1EnvVar( - name=name, - value=value, + ( + V1EnvVar( + name=name, + value_from=V1EnvVarSource( + field_ref=V1ObjectFieldSelector( + field_path=value.strip(PLACEHOLDER_FIELD_PATH) + ) + ), + ) + if value.startswith(PLACEHOLDER_FIELD_PATH) + else V1EnvVar( + name=name, + value=value, + ) ) for name, value in role.env.items() ], diff --git a/0.8.0dev0/custom_components.html b/0.8.0dev0/custom_components.html index ec523be11..0fa45ee64 100644 --- a/0.8.0dev0/custom_components.html +++ b/0.8.0dev0/custom_components.html @@ -538,12 +538,12 @@

Hello World
-torchx 2024-07-22 21:00:43 INFO     Tracker configurations: {}
-torchx 2024-07-22 21:00:43 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
-torchx 2024-07-22 21:00:43 INFO     Log directory is: /tmp/torchx_8tuc8ai0
-torchx 2024-07-22 21:00:43 INFO     Waiting for the app to finish...
+torchx 2024-07-23 16:49:44 INFO     Tracker configurations: {}
+torchx 2024-07-23 16:49:44 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
+torchx 2024-07-23 16:49:44 INFO     Log directory is: /tmp/torchx_kc6luplh
+torchx 2024-07-23 16:49:44 INFO     Waiting for the app to finish...
 greeter/0 Hello, your name!
-torchx 2024-07-22 21:00:44 INFO     Job finished: SUCCEEDED
+torchx 2024-07-23 16:49:45 INFO     Job finished: SUCCEEDED
 

If we want to run in other environments, we can build a Docker container so we can run our component in Docker enabled environments such as Kubernetes or via the local Docker scheduler.

@@ -595,11 +595,12 @@

Hello World
-torchx 2024-07-22 21:02:34 INFO     Tracker configurations: {}
-torchx 2024-07-22 21:02:34 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-07-22 21:02:34 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-07-22 21:02:35 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-07-22 21:02:36 WARNING  failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.45/images/create?tag=latest&fromImage=my_app: Not Found ("pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied")
-torchx 2024-07-22 21:02:36 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-07-22 21:02:36 INFO     Step 1/4 : ARG IMAGE
-torchx 2024-07-22 21:02:36 INFO     Step 2/4 : FROM $IMAGE
-torchx 2024-07-22 21:02:36 INFO      ---> 6894810b9995
-torchx 2024-07-22 21:02:36 INFO     Step 3/4 : COPY . .
-torchx 2024-07-22 21:02:40 INFO      ---> 056e890be821
-torchx 2024-07-22 21:02:40 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-07-22 21:02:40 INFO      ---> Running in 141e2a260a4b
-torchx 2024-07-22 21:02:44 INFO      ---> Removed intermediate container 141e2a260a4b
-torchx 2024-07-22 21:02:44 INFO      ---> 523f4054b4be
-torchx 2024-07-22 21:02:44 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
-torchx 2024-07-22 21:02:44 INFO     Successfully built 523f4054b4be
-torchx 2024-07-22 21:02:44 INFO     Built new image `sha256:523f4054b4be56bc1c1baea10daa9d9516480b017a3160a430061489a81f47a5` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.
-torchx 2024-07-22 21:02:45 INFO     Waiting for the app to finish...
+torchx 2024-07-23 16:51:29 INFO     Tracker configurations: {}
+torchx 2024-07-23 16:51:29 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-07-23 16:51:29 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-07-23 16:51:30 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-07-23 16:51:30 WARNING  failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.45/images/create?tag=latest&fromImage=my_app: Not Found ("pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied")
+torchx 2024-07-23 16:51:30 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-07-23 16:51:30 INFO     Step 1/4 : ARG IMAGE
+torchx 2024-07-23 16:51:30 INFO     Step 2/4 : FROM $IMAGE
+torchx 2024-07-23 16:51:30 INFO      ---> 59cbabfc4bc4
+torchx 2024-07-23 16:51:30 INFO     Step 3/4 : COPY . .
+torchx 2024-07-23 16:51:34 INFO      ---> f0106ecc69d3
+torchx 2024-07-23 16:51:34 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-07-23 16:51:34 INFO      ---> Running in d485ca6a242a
+torchx 2024-07-23 16:51:38 INFO      ---> Removed intermediate container d485ca6a242a
+torchx 2024-07-23 16:51:38 INFO      ---> 1cfc2f00125a
+torchx 2024-07-23 16:51:38 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
+torchx 2024-07-23 16:51:38 INFO     Successfully built 1cfc2f00125a
+torchx 2024-07-23 16:51:38 INFO     Built new image `sha256:1cfc2f00125a37863f8127231761aea50b5299c8ce40597b9bdff1607319d5f4` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.
+torchx 2024-07-23 16:51:41 INFO     Waiting for the app to finish...
 greeter/0 Hello, your name!
-torchx 2024-07-22 21:02:46 INFO     Job finished: SUCCEEDED
+torchx 2024-07-23 16:51:42 INFO     Job finished: SUCCEEDED
 

If you have a Kubernetes cluster you can use the Kubernetes scheduler to launch this on the cluster instead.

@@ -853,25 +836,25 @@

Builtins
-torchx 2024-07-22 21:02:48 INFO     Tracker configurations: {}
-torchx 2024-07-22 21:02:48 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-07-22 21:02:48 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-07-22 21:02:48 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-07-22 21:05:34 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-07-22 21:05:34 INFO     Step 1/4 : ARG IMAGE
-torchx 2024-07-22 21:05:34 INFO     Step 2/4 : FROM $IMAGE
-torchx 2024-07-22 21:05:34 INFO      ---> dced593a08fb
-torchx 2024-07-22 21:05:34 INFO     Step 3/4 : COPY . .
-torchx 2024-07-22 21:05:41 INFO      ---> 2e0bbafaa42b
-torchx 2024-07-22 21:05:41 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-07-22 21:05:41 INFO      ---> Running in e9ca0762c4a5
-torchx 2024-07-22 21:05:48 INFO      ---> Removed intermediate container e9ca0762c4a5
-torchx 2024-07-22 21:05:48 INFO      ---> 05956815667f
-torchx 2024-07-22 21:05:48 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
-torchx 2024-07-22 21:05:48 INFO     Successfully built 05956815667f
-torchx 2024-07-22 21:05:48 INFO     Built new image `sha256:05956815667f37fa6a1a6c84b0766e5df954714086c917274691e8e9d04a2820` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.
-torchx 2024-07-22 21:05:48 INFO     Waiting for the app to finish...
-torchx 2024-07-22 21:05:49 INFO     Job finished: SUCCEEDED
+torchx 2024-07-23 16:51:44 INFO     Tracker configurations: {}
+torchx 2024-07-23 16:51:44 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-07-23 16:51:44 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-07-23 16:51:44 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-07-23 16:54:06 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-07-23 16:54:06 INFO     Step 1/4 : ARG IMAGE
+torchx 2024-07-23 16:54:06 INFO     Step 2/4 : FROM $IMAGE
+torchx 2024-07-23 16:54:06 INFO      ---> 57dff228bc2f
+torchx 2024-07-23 16:54:06 INFO     Step 3/4 : COPY . .
+torchx 2024-07-23 16:54:13 INFO      ---> 6c9063465dd1
+torchx 2024-07-23 16:54:13 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-07-23 16:54:13 INFO      ---> Running in fd424fee4fb1
+torchx 2024-07-23 16:54:20 INFO      ---> Removed intermediate container fd424fee4fb1
+torchx 2024-07-23 16:54:20 INFO      ---> 94146e13e9a6
+torchx 2024-07-23 16:54:20 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
+torchx 2024-07-23 16:54:20 INFO     Successfully built 94146e13e9a6
+torchx 2024-07-23 16:54:20 INFO     Built new image `sha256:94146e13e9a69197f4f54316c8bed7d2cdf6ffcc6607a935eb7a316c1c5629b5` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.
+torchx 2024-07-23 16:54:20 INFO     Waiting for the app to finish...
+torchx 2024-07-23 16:54:20 INFO     Job finished: SUCCEEDED
 echo/0 Hello :)
 
@@ -880,7 +863,7 @@

Builtins
-local_docker://torchx/echo-wffl2x7dd7vsdc
+local_docker://torchx/echo-qv026x0nr27f0c
 
diff --git a/0.8.0dev0/custom_components.ipynb b/0.8.0dev0/custom_components.ipynb index 34976a0be..0a6ba6721 100644 --- a/0.8.0dev0/custom_components.ipynb +++ b/0.8.0dev0/custom_components.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "6f0a0c8c", + "id": "821e81d1", "metadata": {}, "source": [ "# Custom Components\n", @@ -27,13 +27,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "ca20ede8", + "id": "377d91ec", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:00:42.865513Z", - "iopub.status.busy": "2024-07-22T21:00:42.865298Z", - "iopub.status.idle": "2024-07-22T21:00:42.873067Z", - "shell.execute_reply": "2024-07-22T21:00:42.872444Z" + "iopub.execute_input": "2024-07-23T16:49:43.937882Z", + "iopub.status.busy": "2024-07-23T16:49:43.937694Z", + "iopub.status.idle": "2024-07-23T16:49:43.945183Z", + "shell.execute_reply": "2024-07-23T16:49:43.944548Z" } }, "outputs": [ @@ -71,7 +71,7 @@ }, { "cell_type": "markdown", - "id": "57b74805", + "id": "e753d5c0", "metadata": {}, "source": [ "Now that we have an app we can write the component file for it. This\n", @@ -84,13 +84,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "9e15d6ff", + "id": "c98098b8", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:00:42.875669Z", - "iopub.status.busy": "2024-07-22T21:00:42.875214Z", - "iopub.status.idle": "2024-07-22T21:00:42.879018Z", - "shell.execute_reply": "2024-07-22T21:00:42.878395Z" + "iopub.execute_input": "2024-07-23T16:49:43.947927Z", + "iopub.status.busy": "2024-07-23T16:49:43.947463Z", + "iopub.status.idle": "2024-07-23T16:49:43.951328Z", + "shell.execute_reply": "2024-07-23T16:49:43.950675Z" } }, "outputs": [ @@ -126,7 +126,7 @@ }, { "cell_type": "markdown", - "id": "9f44dea9", + "id": "5260e625", "metadata": {}, "source": [ "We can execute our component via `torchx run`. The\n", @@ -136,13 +136,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "ed76e7b3", + "id": "e0383eac", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:00:42.881528Z", - "iopub.status.busy": "2024-07-22T21:00:42.880944Z", - "iopub.status.idle": "2024-07-22T21:00:44.261391Z", - "shell.execute_reply": "2024-07-22T21:00:44.260754Z" + "iopub.execute_input": "2024-07-23T16:49:43.953824Z", + "iopub.status.busy": "2024-07-23T16:49:43.953404Z", + "iopub.status.idle": "2024-07-23T16:49:45.326450Z", + "shell.execute_reply": "2024-07-23T16:49:45.325811Z" } }, "outputs": [ @@ -150,28 +150,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:00:43 INFO Tracker configurations: {}\n" + "torchx 2024-07-23 16:49:44 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:00:43 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" + "torchx 2024-07-23 16:49:44 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:00:43 INFO Log directory is: /tmp/torchx_8tuc8ai0\n" + "torchx 2024-07-23 16:49:44 INFO Log directory is: /tmp/torchx_kc6luplh\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:00:43 INFO Waiting for the app to finish...\n" + "torchx 2024-07-23 16:49:44 INFO Waiting for the app to finish...\n" ] }, { @@ -185,14 +185,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:00:44 INFO Job finished: SUCCEEDED\n" + "torchx 2024-07-23 16:49:45 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_cwd://torchx/hello_world-sw1wqcq96tsdg\n" + "local_cwd://torchx/hello_world-vmjbbm6c3jv2kc\n" ] } ], @@ -203,7 +203,7 @@ }, { "cell_type": "markdown", - "id": "7e1a772f", + "id": "beae8eb0", "metadata": {}, "source": [ "If we want to run in other environments, we can build a Docker container so we\n", @@ -221,13 +221,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "82fa110a", + "id": "34e103ae", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:00:44.264275Z", - "iopub.status.busy": "2024-07-22T21:00:44.263797Z", - "iopub.status.idle": "2024-07-22T21:00:44.268052Z", - "shell.execute_reply": "2024-07-22T21:00:44.267404Z" + "iopub.execute_input": "2024-07-23T16:49:45.329247Z", + "iopub.status.busy": "2024-07-23T16:49:45.328839Z", + "iopub.status.idle": "2024-07-23T16:49:45.332999Z", + "shell.execute_reply": "2024-07-23T16:49:45.332327Z" } }, "outputs": [ @@ -249,7 +249,7 @@ }, { "cell_type": "markdown", - "id": "ce888ce1", + "id": "f0bbc076", "metadata": {}, "source": [ "Once we have the Dockerfile created we can create our docker image." @@ -258,13 +258,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "ab19f483", + "id": "eda5af98", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:00:44.270454Z", - "iopub.status.busy": "2024-07-22T21:00:44.270159Z", - "iopub.status.idle": "2024-07-22T21:02:33.771242Z", - "shell.execute_reply": "2024-07-22T21:02:33.770577Z" + "iopub.execute_input": "2024-07-23T16:49:45.335534Z", + "iopub.status.busy": "2024-07-23T16:49:45.335077Z", + "iopub.status.idle": "2024-07-23T16:51:28.880118Z", + "shell.execute_reply": "2024-07-23T16:51:28.879458Z" } }, "outputs": [ @@ -293,14 +293,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "#1 transferring dockerfile: 99B done\n" + "#1 transferring dockerfile: 36B 0.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#1 DONE 0.0s\n" + "#1 transferring dockerfile: 99B 0.1s done\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "#1 DONE 0.1s\n" ] }, { @@ -321,7 +328,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#2 DONE 0.7s\n" + "#2 DONE 0.4s\n" ] }, { @@ -405,21 +412,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3 3.25kB / 3.25kB done\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 4.19MB / 26.70MB 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 0B / 857B 0.1s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 0B / 189B 0.1s\n" + "#5 sha256:a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3 3.25kB / 3.25kB done\n" ] }, { @@ -433,693 +433,567 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 0B / 26.70MB 0.1s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 857B / 857B 0.2s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0B / 9.94MB 0.2s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 189B / 189B 0.2s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 2.10MB / 26.70MB 0.3s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 0B / 2.00GB 0.3s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 15.73MB / 26.70MB 0.5s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 8.39MB / 9.94MB 0.5s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 23.07MB / 26.70MB 0.6s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 9.94MB / 9.94MB 0.5s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 0B / 132B 0.6s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 26.70MB / 26.70MB 0.7s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 0.1s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0B / 21.46MB 0.7s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 132B / 132B 0.7s done\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 0B / 257B 0.8s\n" + "#5 sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 857B / 857B 0.1s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 257B / 257B 1.0s done\n" + "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0B / 9.94MB 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 0B / 1.71GB 1.0s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 0B / 2.00GB 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 0.8s done\n" + "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 189B / 189B 0.1s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 done\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 20.97MB / 26.70MB 0.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 101.71MB / 2.00GB 2.0s\n" + "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 9.94MB / 9.94MB 0.2s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f done\n" + "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 0B / 132B 0.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.1s\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 26.70MB / 26.70MB 0.4s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 2.10MB / 21.46MB 2.2s\n" + "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 132B / 132B 0.3s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.6s done\n" + "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 0B / 257B 0.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 88.15MB / 1.71GB 2.8s\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 1.05MB / 21.46MB 0.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 4.19MB / 21.46MB 3.5s\n" + "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 208.67MB / 2.00GB 3.6s\n" + "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 257B / 257B 0.5s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 185.60MB / 1.71GB 4.2s\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 12.58MB / 21.46MB 0.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 6.29MB / 21.46MB 4.6s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 0B / 1.71GB 0.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 314.57MB / 2.00GB 5.2s\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 21.46MB / 21.46MB 0.5s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 275.78MB / 1.71GB 5.5s\n" + "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 0B / 352B 0.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 8.39MB / 21.46MB 5.6s\n" + "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 92B / 92B 0.6s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 10.49MB / 21.46MB 6.5s\n" + "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 352B / 352B 0.6s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 421.53MB / 2.00GB 6.8s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 0B / 341.29MB 0.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 367.00MB / 1.71GB 6.8s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 109.20MB / 2.00GB 0.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 12.58MB / 21.46MB 7.4s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 25.17MB / 341.29MB 0.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 457.18MB / 1.71GB 8.1s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 50.33MB / 341.29MB 1.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 14.68MB / 21.46MB 8.2s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 211.81MB / 2.00GB 1.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 528.48MB / 2.00GB 8.4s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 100.66MB / 1.71GB 1.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 16.78MB / 21.46MB 9.0s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 76.73MB / 341.29MB 1.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 547.36MB / 1.71GB 9.4s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 96.47MB / 341.29MB 1.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 18.87MB / 21.46MB 9.8s\n" + "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 1.2s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 634.39MB / 2.00GB 10.0s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 115.34MB / 341.29MB 1.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 20.97MB / 21.46MB 10.4s\n" + "#5 extracting sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 21.46MB / 21.46MB 10.5s done\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 146.80MB / 341.29MB 1.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 0B / 92B 10.6s\n" + "#5 extracting sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 0.0s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 636.49MB / 1.71GB 10.7s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 337.64MB / 2.00GB 2.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 92B / 92B 10.8s done\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 213.91MB / 1.71GB 2.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 0B / 352B 10.9s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 192.94MB / 341.29MB 2.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 352B / 352B 11.0s done\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 249.56MB / 341.29MB 2.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 0B / 341.29MB 11.1s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 312.48MB / 1.71GB 2.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 741.34MB / 2.00GB 11.6s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 314.57MB / 341.29MB 2.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 29.36MB / 341.29MB 11.9s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 333.45MB / 341.29MB 2.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 727.71MB / 1.71GB 12.0s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 469.76MB / 2.00GB 3.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 50.33MB / 341.29MB 12.2s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 422.58MB / 1.71GB 3.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 71.30MB / 341.29MB 12.5s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 583.01MB / 2.00GB 3.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 92.27MB / 341.29MB 12.8s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 341.29MB / 341.29MB 3.5s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 113.25MB / 341.29MB 13.1s\n" + "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 0B / 563.38kB 3.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 847.25MB / 2.00GB 13.2s\n" + "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 563.38kB / 563.38kB 3.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 817.89MB / 1.71GB 13.3s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 511.71MB / 1.71GB 4.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 134.25MB / 341.29MB 13.4s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 688.27MB / 2.00GB 4.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 156.24MB / 341.29MB 13.7s\n" + "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 563.38kB / 563.38kB 4.6s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 177.21MB / 341.29MB 14.0s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 830.47MB / 2.00GB 5.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 205.52MB / 341.29MB 14.4s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 907.02MB / 1.71GB 14.7s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 226.49MB / 341.29MB 14.7s\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 954.20MB / 2.00GB 14.8s\n" + "#5 extracting sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 247.46MB / 341.29MB 15.0s\n" + "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 0B / 556.96kB 5.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 275.78MB / 341.29MB 15.4s\n" + "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 556.96kB / 556.96kB 5.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 297.80MB / 341.29MB 15.8s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 603.98MB / 1.71GB 5.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 317.72MB / 341.29MB 16.0s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 940.57MB / 2.00GB 5.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 998.24MB / 1.71GB 16.2s\n" + "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 556.96kB / 556.96kB 5.7s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.05GB / 2.00GB 16.3s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 699.26MB / 1.71GB 6.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 339.74MB / 341.29MB 16.3s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.04GB / 2.00GB 6.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.08GB / 1.71GB 17.5s\n" + "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 341.29MB / 341.29MB 18.1s done\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 787.48MB / 1.71GB 6.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 0B / 563.38kB 18.1s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.14GB / 2.00GB 6.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.16GB / 2.00GB 18.2s\n" + "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.7s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 563.38kB / 563.38kB 18.3s done\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 889.19MB / 1.71GB 7.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 0B / 556.96kB 18.4s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.25GB / 2.00GB 7.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.18GB / 1.71GB 18.8s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 981.47MB / 1.71GB 8.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 556.96kB / 556.96kB 18.7s done\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.35GB / 2.00GB 8.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.26GB / 2.00GB 19.7s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.09GB / 1.71GB 8.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.27GB / 1.71GB 20.1s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.46GB / 2.00GB 9.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.37GB / 2.00GB 21.3s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.20GB / 1.71GB 9.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.36GB / 1.71GB 21.4s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.57GB / 2.00GB 10.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.45GB / 1.71GB 22.7s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.28GB / 1.71GB 10.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.47GB / 2.00GB 22.8s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.38GB / 1.71GB 10.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.55GB / 1.71GB 24.1s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.67GB / 2.00GB 10.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.58GB / 2.00GB 24.4s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.78GB / 2.00GB 11.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.64GB / 1.71GB 25.7s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.48GB / 1.71GB 11.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.68GB / 2.00GB 25.9s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.89GB / 2.00GB 11.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.78GB / 2.00GB 27.4s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.58GB / 1.71GB 12.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.71GB / 1.71GB 28.1s done\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 2.00GB / 2.00GB 12.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.89GB / 2.00GB 29.0s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.68GB / 1.71GB 12.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 2.00GB / 2.00GB 30.6s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 2.00GB / 2.00GB 13.7s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 2.00GB / 2.00GB 32.5s done\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 0.1s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.71GB / 1.71GB 15.2s done\n" ] }, { @@ -1133,7 +1007,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 10.2s\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 10.1s\n" ] }, { @@ -1154,21 +1028,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 25.3s\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 25.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 30.3s\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 30.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 30.5s done\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 31.0s done\n" ] }, { @@ -1196,7 +1070,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0.8s done\n" + "#5 extracting sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0.7s done\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "#5 extracting sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de\n" ] }, { @@ -1210,35 +1091,35 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 5.0s\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 5.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 10.1s\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 10.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 15.1s\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 15.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 17.4s done\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 17.5s done\n" ] }, { @@ -1273,21 +1154,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 6.1s\n" + "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 5.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 11.2s\n" + "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 10.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 13.2s done\n" + "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 13.3s done\n" ] }, { @@ -1322,7 +1203,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 DONE 100.4s\n" + "#5 DONE 83.7s\n" ] }, { @@ -1371,14 +1252,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "#7 exporting layers 4.0s done\n" + "#7 exporting layers 4.1s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#7 writing image sha256:6894810b99959b068fed4046cd9c709a785a92d740fbbc85f8b5e485bffc9500 done\n" + "#7 writing image sha256:59cbabfc4bc4273f19893e6f5be46e117b968e6aa345e1004ca8a4ae7bd1650c done\n" ] }, { @@ -1392,7 +1273,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#7 DONE 4.0s\n" + "#7 DONE 4.1s\n" ] } ], @@ -1403,7 +1284,7 @@ }, { "cell_type": "markdown", - "id": "797620f5", + "id": "6aabdf26", "metadata": {}, "source": [ "We can then launch it on the local scheduler." @@ -1412,13 +1293,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "77e00b3d", + "id": "296c094b", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:02:33.774133Z", - "iopub.status.busy": "2024-07-22T21:02:33.773723Z", - "iopub.status.idle": "2024-07-22T21:02:46.419243Z", - "shell.execute_reply": "2024-07-22T21:02:46.418597Z" + "iopub.execute_input": "2024-07-23T16:51:28.882967Z", + "iopub.status.busy": "2024-07-23T16:51:28.882630Z", + "iopub.status.idle": "2024-07-23T16:51:42.826016Z", + "shell.execute_reply": "2024-07-23T16:51:42.825348Z" } }, "outputs": [ @@ -1426,133 +1307,133 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:34 INFO Tracker configurations: {}\n" + "torchx 2024-07-23 16:51:29 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:34 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-07-23 16:51:29 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:34 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-07-23 16:51:29 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:35 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-07-23 16:51:30 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:36 WARNING failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.45/images/create?tag=latest&fromImage=my_app: Not Found (\"pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied\")\n" + "torchx 2024-07-23 16:51:30 WARNING failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.45/images/create?tag=latest&fromImage=my_app: Not Found (\"pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied\")\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:36 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-07-23 16:51:30 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:36 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-07-23 16:51:30 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:36 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-07-23 16:51:30 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:36 INFO ---> 6894810b9995\n" + "torchx 2024-07-23 16:51:30 INFO ---> 59cbabfc4bc4\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:36 INFO Step 3/4 : COPY . .\n" + "torchx 2024-07-23 16:51:30 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:40 INFO ---> 056e890be821\n" + "torchx 2024-07-23 16:51:34 INFO ---> f0106ecc69d3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:40 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-07-23 16:51:34 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:40 INFO ---> Running in 141e2a260a4b\n" + "torchx 2024-07-23 16:51:34 INFO ---> Running in d485ca6a242a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:44 INFO ---> Removed intermediate container 141e2a260a4b\n" + "torchx 2024-07-23 16:51:38 INFO ---> Removed intermediate container d485ca6a242a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:44 INFO ---> 523f4054b4be\n" + "torchx 2024-07-23 16:51:38 INFO ---> 1cfc2f00125a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:44 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-07-23 16:51:38 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:44 INFO Successfully built 523f4054b4be\n" + "torchx 2024-07-23 16:51:38 INFO Successfully built 1cfc2f00125a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:44 INFO Built new image `sha256:523f4054b4be56bc1c1baea10daa9d9516480b017a3160a430061489a81f47a5` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.\n" + "torchx 2024-07-23 16:51:38 INFO Built new image `sha256:1cfc2f00125a37863f8127231761aea50b5299c8ce40597b9bdff1607319d5f4` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:45 INFO Waiting for the app to finish...\n" + "torchx 2024-07-23 16:51:41 INFO Waiting for the app to finish...\n" ] }, { @@ -1566,14 +1447,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:46 INFO Job finished: SUCCEEDED\n" + "torchx 2024-07-23 16:51:42 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/hello_world-fzgbvxbtf7xqsd\n" + "local_docker://torchx/hello_world-nmczf0v3x19t0\n" ] } ], @@ -1584,7 +1465,7 @@ }, { "cell_type": "markdown", - "id": "fb0e0e17", + "id": "cd364098", "metadata": {}, "source": [ "If you have a Kubernetes cluster you can use the [Kubernetes scheduler](schedulers/kubernetes.rst) to launch\n", @@ -1593,7 +1474,7 @@ }, { "cell_type": "markdown", - "id": "b18da2b1", + "id": "cc61e05b", "metadata": { "lines_to_next_cell": 2, "region_name": "md" @@ -1607,7 +1488,7 @@ }, { "cell_type": "markdown", - "id": "1fed9f6a", + "id": "923766e0", "metadata": {}, "source": [ "## Builtins\n", @@ -1619,13 +1500,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "8fa0f7ba", + "id": "c9e5a188", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:02:46.422414Z", - "iopub.status.busy": "2024-07-22T21:02:46.421882Z", - "iopub.status.idle": "2024-07-22T21:02:47.410878Z", - "shell.execute_reply": "2024-07-22T21:02:47.410231Z" + "iopub.execute_input": "2024-07-23T16:51:42.828850Z", + "iopub.status.busy": "2024-07-23T16:51:42.828576Z", + "iopub.status.idle": "2024-07-23T16:51:43.793578Z", + "shell.execute_reply": "2024-07-23T16:51:43.792918Z" } }, "outputs": [ @@ -1721,7 +1602,7 @@ }, { "cell_type": "markdown", - "id": "35abaf05", + "id": "5d29fa71", "metadata": {}, "source": [ "You can use these either from the CLI, from a pipeline or programmatically like\n", @@ -1731,13 +1612,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "a771f0ab", + "id": "9213b985", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:02:47.413454Z", - "iopub.status.busy": "2024-07-22T21:02:47.413193Z", - "iopub.status.idle": "2024-07-22T21:05:49.069347Z", - "shell.execute_reply": "2024-07-22T21:05:49.068666Z" + "iopub.execute_input": "2024-07-23T16:51:43.796218Z", + "iopub.status.busy": "2024-07-23T16:51:43.795996Z", + "iopub.status.idle": "2024-07-23T16:54:20.518943Z", + "shell.execute_reply": "2024-07-23T16:54:20.518130Z" } }, "outputs": [ @@ -1745,133 +1626,133 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:48 INFO Tracker configurations: {}\n" + "torchx 2024-07-23 16:51:44 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:48 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-07-23 16:51:44 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:48 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-07-23 16:51:44 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:02:48 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-07-23 16:51:44 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:34 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-07-23 16:54:06 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:34 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-07-23 16:54:06 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:34 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-07-23 16:54:06 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:34 INFO ---> dced593a08fb\n" + "torchx 2024-07-23 16:54:06 INFO ---> 57dff228bc2f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:34 INFO Step 3/4 : COPY . .\n" + "torchx 2024-07-23 16:54:06 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:41 INFO ---> 2e0bbafaa42b\n" + "torchx 2024-07-23 16:54:13 INFO ---> 6c9063465dd1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:41 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-07-23 16:54:13 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:41 INFO ---> Running in e9ca0762c4a5\n" + "torchx 2024-07-23 16:54:13 INFO ---> Running in fd424fee4fb1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:48 INFO ---> Removed intermediate container e9ca0762c4a5\n" + "torchx 2024-07-23 16:54:20 INFO ---> Removed intermediate container fd424fee4fb1\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:48 INFO ---> 05956815667f\n" + "torchx 2024-07-23 16:54:20 INFO ---> 94146e13e9a6\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:48 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-07-23 16:54:20 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:48 INFO Successfully built 05956815667f\n" + "torchx 2024-07-23 16:54:20 INFO Successfully built 94146e13e9a6\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:48 INFO Built new image `sha256:05956815667f37fa6a1a6c84b0766e5df954714086c917274691e8e9d04a2820` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.\n" + "torchx 2024-07-23 16:54:20 INFO Built new image `sha256:94146e13e9a69197f4f54316c8bed7d2cdf6ffcc6607a935eb7a316c1c5629b5` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:48 INFO Waiting for the app to finish...\n" + "torchx 2024-07-23 16:54:20 INFO Waiting for the app to finish...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:49 INFO Job finished: SUCCEEDED\n" + "torchx 2024-07-23 16:54:20 INFO Job finished: SUCCEEDED\n" ] }, { @@ -1885,7 +1766,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/echo-wffl2x7dd7vsdc\n" + "local_docker://torchx/echo-qv026x0nr27f0c\n" ] } ], diff --git a/0.8.0dev0/pipelines/airflow.html b/0.8.0dev0/pipelines/airflow.html index b14bd9c2e..4592f9fd9 100644 --- a/0.8.0dev0/pipelines/airflow.html +++ b/0.8.0dev0/pipelines/airflow.html @@ -529,7 +529,7 @@

Airflow
-
/tmp/ipykernel_4745/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.
+
/tmp/ipykernel_4683/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.
 
@@ -537,20 +537,20 @@

Airflow
-[2024-07-22T21:05:52.449+0000] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti=<TaskInstance: example_python_operator-lczkcgvjlhm4gc.hello_torchx manual__2021-09-13T00:00:00+00:00 [None]>
-[2024-07-22T21:05:52.455+0000] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti=<TaskInstance: example_python_operator-lczkcgvjlhm4gc.hello_torchx manual__2021-09-13T00:00:00+00:00 [None]>
-[2024-07-22T21:05:52.455+0000] {taskinstance.py:2306} INFO - Starting attempt 1 of 1
-[2024-07-22T21:05:52.456+0000] {taskinstance.py:2388} WARNING - cannot record queued_duration for task hello_torchx because previous state change time has not been saved
-[2024-07-22T21:05:52.465+0000] {taskinstance.py:2330} INFO - Executing <Task(_PythonDecoratedOperator): hello_torchx> on 2021-09-13 00:00:00+00:00
-[2024-07-22T21:05:52.719+0000] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-lczkcgvjlhm4gc' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'
-[2024-07-22T21:05:52.722+0000] {taskinstance.py:430} INFO - ::endgroup::
-[2024-07-22T21:05:53.410+0000] {api.py:72} INFO - Tracker configurations: {}
-[2024-07-22T21:05:53.414+0000] {local_scheduler.py:771} INFO - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
-[2024-07-22T21:05:53.415+0000] {local_scheduler.py:777} INFO - Log directory is: /tmp/torchx_ljdlx37v
+[2024-07-23T16:54:23.950+0000] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=non-requeueable deps ti=<TaskInstance: example_python_operator-vxmc43hj9dj2g.hello_torchx manual__2021-09-13T00:00:00+00:00 [None]>
+[2024-07-23T16:54:23.955+0000] {taskinstance.py:2076} INFO - Dependencies all met for dep_context=requeueable deps ti=<TaskInstance: example_python_operator-vxmc43hj9dj2g.hello_torchx manual__2021-09-13T00:00:00+00:00 [None]>
+[2024-07-23T16:54:23.956+0000] {taskinstance.py:2306} INFO - Starting attempt 1 of 1
+[2024-07-23T16:54:23.957+0000] {taskinstance.py:2388} WARNING - cannot record queued_duration for task hello_torchx because previous state change time has not been saved
+[2024-07-23T16:54:23.966+0000] {taskinstance.py:2330} INFO - Executing <Task(_PythonDecoratedOperator): hello_torchx> on 2021-09-13 00:00:00+00:00
+[2024-07-23T16:54:24.227+0000] {taskinstance.py:2648} INFO - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-vxmc43hj9dj2g' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'
+[2024-07-23T16:54:24.230+0000] {taskinstance.py:430} INFO - ::endgroup::
+[2024-07-23T16:54:24.924+0000] {api.py:72} INFO - Tracker configurations: {}
+[2024-07-23T16:54:24.927+0000] {local_scheduler.py:771} INFO - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
+[2024-07-23T16:54:24.928+0000] {local_scheduler.py:777} INFO - Log directory is: /tmp/torchx_wh8khrqb
 Hello, TorchX!
-[2024-07-22T21:05:53.523+0000] {python.py:237} INFO - Done. Returned value was: None
-[2024-07-22T21:05:53.524+0000] {taskinstance.py:441} INFO - ::group::Post task execution logs
-[2024-07-22T21:05:53.529+0000] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=example_python_operator-lczkcgvjlhm4gc, task_id=hello_torchx, run_id=manual__2021-09-13T00:00:00+00:00, execution_date=20210913T000000, start_date=20240722T210552, end_date=20240722T210553
+[2024-07-23T16:54:25.036+0000] {python.py:237} INFO - Done. Returned value was: None
+[2024-07-23T16:54:25.037+0000] {taskinstance.py:441} INFO - ::group::Post task execution logs
+[2024-07-23T16:54:25.042+0000] {taskinstance.py:1206} INFO - Marking task as SUCCESS. dag_id=example_python_operator-vxmc43hj9dj2g, task_id=hello_torchx, run_id=manual__2021-09-13T00:00:00+00:00, execution_date=20210913T000000, start_date=20240723T165423, end_date=20240723T165425
 

If all goes well you should see Hello, TorchX! printed above.

diff --git a/0.8.0dev0/pipelines/airflow.ipynb b/0.8.0dev0/pipelines/airflow.ipynb index a0cd25e3a..00e4c8e18 100644 --- a/0.8.0dev0/pipelines/airflow.ipynb +++ b/0.8.0dev0/pipelines/airflow.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "0a17b761", + "id": "f879c95a", "metadata": {}, "source": [ "# Airflow\n", @@ -18,13 +18,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "6542b900", + "id": "546f0ed6", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:05:51.615210Z", - "iopub.status.busy": "2024-07-22T21:05:51.614812Z", - "iopub.status.idle": "2024-07-22T21:05:52.232110Z", - "shell.execute_reply": "2024-07-22T21:05:52.231543Z" + "iopub.execute_input": "2024-07-23T16:54:23.129907Z", + "iopub.status.busy": "2024-07-23T16:54:23.129682Z", + "iopub.status.idle": "2024-07-23T16:54:23.732699Z", + "shell.execute_reply": "2024-07-23T16:54:23.732038Z" } }, "outputs": [], @@ -44,7 +44,7 @@ }, { "cell_type": "markdown", - "id": "ae2c469a", + "id": "71b094c2", "metadata": {}, "source": [ "To launch a TorchX job from Airflow you can create a Airflow Python task to\n", @@ -56,13 +56,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "3d6cfa27", + "id": "20f71c19", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:05:52.235179Z", - "iopub.status.busy": "2024-07-22T21:05:52.234728Z", - "iopub.status.idle": "2024-07-22T21:05:52.239630Z", - "shell.execute_reply": "2024-07-22T21:05:52.238972Z" + "iopub.execute_input": "2024-07-23T16:54:23.735887Z", + "iopub.status.busy": "2024-07-23T16:54:23.735385Z", + "iopub.status.idle": "2024-07-23T16:54:23.740145Z", + "shell.execute_reply": "2024-07-23T16:54:23.739508Z" } }, "outputs": [], @@ -93,7 +93,7 @@ }, { "cell_type": "markdown", - "id": "0579604d", + "id": "52c86120", "metadata": {}, "source": [ "Once we have the task defined we can put it into a Airflow DAG and run it like\n", @@ -103,24 +103,24 @@ { "cell_type": "code", "execution_count": 3, - "id": "c7c63de9", + "id": "cd983551", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:05:52.241861Z", - "iopub.status.busy": "2024-07-22T21:05:52.241668Z", - "iopub.status.idle": "2024-07-22T21:05:53.537490Z", - "shell.execute_reply": "2024-07-22T21:05:53.536920Z" + "iopub.execute_input": "2024-07-23T16:54:23.742636Z", + "iopub.status.busy": "2024-07-23T16:54:23.742234Z", + "iopub.status.idle": "2024-07-23T16:54:25.050516Z", + "shell.execute_reply": "2024-07-23T16:54:25.049946Z" } }, "outputs": [ { "data": { "text/html": [ - "
/tmp/ipykernel_4745/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\n",
+       "
/tmp/ipykernel_4683/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\n",
        "
\n" ], "text/plain": [ - "\u001b[1;33m/tmp/ipykernel_4745/\u001b[0m\u001b[1;33m454499020.py\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m3\u001b[0m\u001b[1;33m RemovedInAirflow3Warning\u001b[0m\u001b[33m: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\u001b[0m\n" + "\u001b[1;33m/tmp/ipykernel_4683/\u001b[0m\u001b[1;33m454499020.py\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m3\u001b[0m\u001b[1;33m RemovedInAirflow3Warning\u001b[0m\u001b[33m: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\u001b[0m\n" ] }, "metadata": {}, @@ -130,70 +130,70 @@ "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:52.449+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2076} INFO\u001b[0m - Dependencies all met for dep_context=non-requeueable deps ti=\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:23.950+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2076} INFO\u001b[0m - Dependencies all met for dep_context=non-requeueable deps ti=\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:52.455+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2076} INFO\u001b[0m - Dependencies all met for dep_context=requeueable deps ti=\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:23.955+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2076} INFO\u001b[0m - Dependencies all met for dep_context=requeueable deps ti=\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:52.455+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2306} INFO\u001b[0m - Starting attempt 1 of 1\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:23.956+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2306} INFO\u001b[0m - Starting attempt 1 of 1\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:52.456+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2388} WARNING\u001b[0m - cannot record queued_duration for task hello_torchx because previous state change time has not been saved\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:23.957+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2388} WARNING\u001b[0m - cannot record queued_duration for task hello_torchx because previous state change time has not been saved\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:52.465+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2330} INFO\u001b[0m - Executing on 2021-09-13 00:00:00+00:00\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:23.966+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2330} INFO\u001b[0m - Executing on 2021-09-13 00:00:00+00:00\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:52.719+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2648} INFO\u001b[0m - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-lczkcgvjlhm4gc' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:24.227+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2648} INFO\u001b[0m - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-vxmc43hj9dj2g' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:52.722+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m430} INFO\u001b[0m - ::endgroup::\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:24.230+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m430} INFO\u001b[0m - ::endgroup::\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:53.410+0000\u001b[0m] {\u001b[34mapi.py:\u001b[0m72} INFO\u001b[0m - Tracker configurations: {}\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:24.924+0000\u001b[0m] {\u001b[34mapi.py:\u001b[0m72} INFO\u001b[0m - Tracker configurations: {}\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:53.414+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m771} INFO\u001b[0m - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:24.927+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m771} INFO\u001b[0m - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:53.415+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m777} INFO\u001b[0m - Log directory is: /tmp/torchx_ljdlx37v\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:24.928+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m777} INFO\u001b[0m - Log directory is: /tmp/torchx_wh8khrqb\u001b[0m\n" ] }, { @@ -201,21 +201,21 @@ "output_type": "stream", "text": [ "Hello, TorchX!\n", - "[\u001b[34m2024-07-22T21:05:53.523+0000\u001b[0m] {\u001b[34mpython.py:\u001b[0m237} INFO\u001b[0m - Done. Returned value was: None\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:25.036+0000\u001b[0m] {\u001b[34mpython.py:\u001b[0m237} INFO\u001b[0m - Done. Returned value was: None\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:53.524+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m441} INFO\u001b[0m - ::group::Post task execution logs\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:25.037+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m441} INFO\u001b[0m - ::group::Post task execution logs\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-07-22T21:05:53.529+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m1206} INFO\u001b[0m - Marking task as SUCCESS. dag_id=example_python_operator-lczkcgvjlhm4gc, task_id=hello_torchx, run_id=manual__2021-09-13T00:00:00+00:00, execution_date=20210913T000000, start_date=20240722T210552, end_date=20240722T210553\u001b[0m\n" + "[\u001b[34m2024-07-23T16:54:25.042+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m1206} INFO\u001b[0m - Marking task as SUCCESS. dag_id=example_python_operator-vxmc43hj9dj2g, task_id=hello_torchx, run_id=manual__2021-09-13T00:00:00+00:00, execution_date=20210913T000000, start_date=20240723T165423, end_date=20240723T165425\u001b[0m\n" ] } ], @@ -247,7 +247,7 @@ }, { "cell_type": "markdown", - "id": "a011ff97", + "id": "a446901f", "metadata": {}, "source": [ "If all goes well you should see `Hello, TorchX!` printed above.\n", diff --git a/0.8.0dev0/quickstart.html b/0.8.0dev0/quickstart.html index 3a2aa681a..10a2e95ec 100644 --- a/0.8.0dev0/quickstart.html +++ b/0.8.0dev0/quickstart.html @@ -582,12 +582,12 @@

Launching
-torchx 2024-07-22 21:05:57 INFO     Tracker configurations: {}
-torchx 2024-07-22 21:05:57 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
-torchx 2024-07-22 21:05:57 INFO     Log directory is: /tmp/torchx_57w4cxsf
-torchx 2024-07-22 21:05:57 INFO     Waiting for the app to finish...
+torchx 2024-07-23 16:54:29 INFO     Tracker configurations: {}
+torchx 2024-07-23 16:54:29 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
+torchx 2024-07-23 16:54:29 INFO     Log directory is: /tmp/torchx_jnu_js7p
+torchx 2024-07-23 16:54:29 INFO     Waiting for the app to finish...
 python/0 Hello, your name!
-torchx 2024-07-22 21:05:58 INFO     Job finished: SUCCEEDED
+torchx 2024-07-23 16:54:30 INFO     Job finished: SUCCEEDED
 

We can run the exact same app via the local_docker scheduler. This scheduler will package up the local workspace as a layer on top of the specified image. This provides a very similar environment to the container based remote schedulers.

@@ -615,26 +615,26 @@

Launching
-torchx 2024-07-22 21:05:59 INFO     Tracker configurations: {}
-torchx 2024-07-22 21:05:59 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-07-22 21:05:59 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-07-22 21:06:00 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-07-22 21:06:00 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-07-22 21:06:00 INFO     Step 1/4 : ARG IMAGE
-torchx 2024-07-22 21:06:00 INFO     Step 2/4 : FROM $IMAGE
-torchx 2024-07-22 21:06:00 INFO      ---> dced593a08fb
-torchx 2024-07-22 21:06:00 INFO     Step 3/4 : COPY . .
-torchx 2024-07-22 21:06:07 INFO      ---> b00c44e391a6
-torchx 2024-07-22 21:06:07 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-07-22 21:06:07 INFO      ---> Running in 85fe6ef573d4
-torchx 2024-07-22 21:06:14 INFO      ---> Removed intermediate container 85fe6ef573d4
-torchx 2024-07-22 21:06:14 INFO      ---> c8c3e67341ab
-torchx 2024-07-22 21:06:14 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
-torchx 2024-07-22 21:06:14 INFO     Successfully built c8c3e67341ab
-torchx 2024-07-22 21:06:14 INFO     Built new image `sha256:c8c3e67341aba572d62b3d355b753f405de428ef3ae19c699b34ebad7b0aaefd` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.
-torchx 2024-07-22 21:06:14 INFO     Waiting for the app to finish...
+torchx 2024-07-23 16:54:31 INFO     Tracker configurations: {}
+torchx 2024-07-23 16:54:31 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-07-23 16:54:31 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-07-23 16:54:31 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-07-23 16:54:31 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-07-23 16:54:31 INFO     Step 1/4 : ARG IMAGE
+torchx 2024-07-23 16:54:31 INFO     Step 2/4 : FROM $IMAGE
+torchx 2024-07-23 16:54:31 INFO      ---> 57dff228bc2f
+torchx 2024-07-23 16:54:31 INFO     Step 3/4 : COPY . .
+torchx 2024-07-23 16:54:38 INFO      ---> 057c5a4a4d4c
+torchx 2024-07-23 16:54:38 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-07-23 16:54:38 INFO      ---> Running in 83919021be0b
+torchx 2024-07-23 16:54:45 INFO      ---> Removed intermediate container 83919021be0b
+torchx 2024-07-23 16:54:45 INFO      ---> c9afc99a3d8f
+torchx 2024-07-23 16:54:45 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
+torchx 2024-07-23 16:54:45 INFO     Successfully built c9afc99a3d8f
+torchx 2024-07-23 16:54:45 INFO     Built new image `sha256:c9afc99a3d8ff0884370fa40619566378dcd1fae5541b214edec5f1517632e4e` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.
+torchx 2024-07-23 16:54:46 INFO     Waiting for the app to finish...
 python/0 Hello, your name!
-torchx 2024-07-22 21:06:15 INFO     Job finished: SUCCEEDED
+torchx 2024-07-23 16:54:47 INFO     Job finished: SUCCEEDED
 

TorchX defaults to using the ghcr.io/pytorch/torchx Docker container image which contains the PyTorch libraries, TorchX and related dependencies.

@@ -765,41 +765,41 @@

Distributed
-torchx 2024-07-22 21:06:17 INFO     Tracker configurations: {}
-torchx 2024-07-22 21:06:17 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-07-22 21:06:17 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-07-22 21:06:17 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-07-22 21:06:18 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-07-22 21:06:18 INFO     Step 1/4 : ARG IMAGE
-torchx 2024-07-22 21:06:18 INFO     Step 2/4 : FROM $IMAGE
-torchx 2024-07-22 21:06:18 INFO      ---> dced593a08fb
-torchx 2024-07-22 21:06:18 INFO     Step 3/4 : COPY . .
-torchx 2024-07-22 21:06:25 INFO      ---> 73b8d39ff9e9
-torchx 2024-07-22 21:06:25 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-07-22 21:06:25 INFO      ---> Running in fe7309557934
-torchx 2024-07-22 21:06:31 INFO      ---> Removed intermediate container fe7309557934
-torchx 2024-07-22 21:06:31 INFO      ---> 2c8eaf9aaf2e
-torchx 2024-07-22 21:06:31 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
-torchx 2024-07-22 21:06:31 INFO     Successfully built 2c8eaf9aaf2e
-torchx 2024-07-22 21:06:31 INFO     Built new image `sha256:2c8eaf9aaf2e9a78ca98ea53c8efba5d579bb90040704afc6e81c4ebe23ceeea` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.
-torchx 2024-07-22 21:06:32 INFO     Waiting for the app to finish...
-dist_app/1 [2024-07-22 21:06:33,446] torch.distributed.run: [WARNING]
-dist_app/1 [2024-07-22 21:06:33,446] torch.distributed.run: [WARNING] *****************************************
-dist_app/1 [2024-07-22 21:06:33,446] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
-dist_app/1 [2024-07-22 21:06:33,446] torch.distributed.run: [WARNING] *****************************************
-dist_app/0 [2024-07-22 21:06:33,485] torch.distributed.run: [WARNING]
-dist_app/0 [2024-07-22 21:06:33,485] torch.distributed.run: [WARNING] *****************************************
-dist_app/0 [2024-07-22 21:06:33,485] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
-dist_app/0 [2024-07-22 21:06:33,485] torch.distributed.run: [WARNING] *****************************************
+torchx 2024-07-23 16:54:49 INFO     Tracker configurations: {}
+torchx 2024-07-23 16:54:49 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-07-23 16:54:49 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-07-23 16:54:49 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-07-23 16:54:49 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-07-23 16:54:49 INFO     Step 1/4 : ARG IMAGE
+torchx 2024-07-23 16:54:49 INFO     Step 2/4 : FROM $IMAGE
+torchx 2024-07-23 16:54:49 INFO      ---> 57dff228bc2f
+torchx 2024-07-23 16:54:49 INFO     Step 3/4 : COPY . .
+torchx 2024-07-23 16:54:56 INFO      ---> e20e4c6c041b
+torchx 2024-07-23 16:54:56 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-07-23 16:54:56 INFO      ---> Running in 917e33ee3a92
+torchx 2024-07-23 16:55:03 INFO      ---> Removed intermediate container 917e33ee3a92
+torchx 2024-07-23 16:55:03 INFO      ---> 88da38ff059f
+torchx 2024-07-23 16:55:03 INFO     [Warning] One or more build-args [WORKSPACE] were not consumed
+torchx 2024-07-23 16:55:03 INFO     Successfully built 88da38ff059f
+torchx 2024-07-23 16:55:03 INFO     Built new image `sha256:88da38ff059f51ed9370709589413db39576cbab42045ff58bc0fe599472d703` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.
+torchx 2024-07-23 16:55:04 INFO     Waiting for the app to finish...
+dist_app/1 [2024-07-23 16:55:05,144] torch.distributed.run: [WARNING]
+dist_app/1 [2024-07-23 16:55:05,144] torch.distributed.run: [WARNING] *****************************************
+dist_app/1 [2024-07-23 16:55:05,144] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
+dist_app/1 [2024-07-23 16:55:05,144] torch.distributed.run: [WARNING] *****************************************
+dist_app/0 [2024-07-23 16:55:05,188] torch.distributed.run: [WARNING]
+dist_app/0 [2024-07-23 16:55:05,188] torch.distributed.run: [WARNING] *****************************************
+dist_app/0 [2024-07-23 16:55:05,188] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
+dist_app/0 [2024-07-23 16:55:05,188] torch.distributed.run: [WARNING] *****************************************
 dist_app/1 [1]:I am worker 3 of 4!
+dist_app/1 [1]:all_reduce output = tensor([6])
 dist_app/1 [0]:I am worker 2 of 4!
 dist_app/1 [0]:all_reduce output = tensor([6])
-dist_app/1 [1]:all_reduce output = tensor([6])
-dist_app/0 [0]:I am worker 0 of 4!
-dist_app/0 [0]:all_reduce output = tensor([6])
 dist_app/0 [1]:I am worker 1 of 4!
 dist_app/0 [1]:all_reduce output = tensor([6])
-torchx 2024-07-22 21:06:40 INFO     Job finished: SUCCEEDED
+dist_app/0 [0]:I am worker 0 of 4!
+dist_app/0 [0]:all_reduce output = tensor([6])
+torchx 2024-07-23 16:55:13 INFO     Job finished: SUCCEEDED
 
@@ -1189,57 +1189,57 @@

Docker-based Schedulers
-torchx 2024-07-22 21:06:42 INFO     loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig
-torchx 2024-07-22 21:06:43 INFO     Tracker configurations: {}
-torchx 2024-07-22 21:06:43 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
-torchx 2024-07-22 21:06:43 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
-torchx 2024-07-22 21:06:43 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
-torchx 2024-07-22 21:06:43 INFO     Building workspace docker image (this may take a while)...
-torchx 2024-07-22 21:06:43 INFO     Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime
-torchx 2024-07-22 21:07:55 INFO      ---> c3f17e5ac010
-torchx 2024-07-22 21:07:55 INFO     Step 2/4 : RUN pip install timm
-torchx 2024-07-22 21:07:55 INFO      ---> Running in f2ea0a555437
-torchx 2024-07-22 21:07:55 INFO     Collecting timm
-torchx 2024-07-22 21:07:55 INFO       Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)
-torchx 2024-07-22 21:07:55 INFO     Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)
-torchx 2024-07-22 21:07:55 INFO     Collecting huggingface-hub
-torchx 2024-07-22 21:07:55 INFO       Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
-torchx 2024-07-22 21:07:55 INFO     Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)
-torchx 2024-07-22 21:07:55 INFO     Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)
-torchx 2024-07-22 21:07:56 INFO     Collecting safetensors
-torchx 2024-07-22 21:07:56 INFO       Downloading safetensors-0.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)
-torchx 2024-07-22 21:07:56 INFO     Collecting packaging>=20.9
-torchx 2024-07-22 21:07:56 INFO       Downloading packaging-24.0-py3-none-any.whl (53 kB)
-torchx 2024-07-22 21:07:56 INFO     Collecting fsspec
-torchx 2024-07-22 21:07:56 INFO       Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)
-torchx 2024-07-22 21:07:56 INFO     Collecting importlib-metadata
-torchx 2024-07-22 21:07:56 INFO       Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)
-torchx 2024-07-22 21:07:56 INFO     Collecting zipp>=0.5
-torchx 2024-07-22 21:07:56 INFO       Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)
-torchx 2024-07-22 21:07:56 INFO     Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)
-torchx 2024-07-22 21:07:57 INFO     Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm
-torchx 2024-07-22 21:07:57 INFO     Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.3 timm-0.9.12 zipp-3.15.0
-torchx 2024-07-22 21:08:00 INFO      ---> Removed intermediate container f2ea0a555437
-torchx 2024-07-22 21:08:00 INFO      ---> 0fdc9a81a479
-torchx 2024-07-22 21:08:00 INFO     Step 3/4 : COPY . .
-torchx 2024-07-22 21:08:03 INFO      ---> 25513fc7a6fb
-torchx 2024-07-22 21:08:03 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
-torchx 2024-07-22 21:08:03 INFO      ---> Running in c6646d9ef0c4
-torchx 2024-07-22 21:08:05 INFO      ---> Removed intermediate container c6646d9ef0c4
-torchx 2024-07-22 21:08:05 INFO      ---> c67744da5bd8
-torchx 2024-07-22 21:08:05 INFO     [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed
-torchx 2024-07-22 21:08:05 INFO     Successfully built c67744da5bd8
-torchx 2024-07-22 21:08:05 INFO     Built new image `sha256:c67744da5bd8865705a30e6d7180d7902ec16780f29a6591446985e1f56ad660` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.
-torchx 2024-07-22 21:08:05 INFO     Waiting for the app to finish...
+torchx 2024-07-23 16:55:15 INFO     loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig
+torchx 2024-07-23 16:55:16 INFO     Tracker configurations: {}
+torchx 2024-07-23 16:55:16 INFO     Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...
+torchx 2024-07-23 16:55:16 INFO     To disable workspaces pass: --workspace="" from CLI or workspace=None programmatically.
+torchx 2024-07-23 16:55:16 INFO     Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`
+torchx 2024-07-23 16:55:16 INFO     Building workspace docker image (this may take a while)...
+torchx 2024-07-23 16:55:16 INFO     Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime
+torchx 2024-07-23 16:56:40 INFO      ---> c3f17e5ac010
+torchx 2024-07-23 16:56:40 INFO     Step 2/4 : RUN pip install timm
+torchx 2024-07-23 16:56:40 INFO      ---> Running in 48579788648a
+torchx 2024-07-23 16:56:40 INFO     Collecting timm
+torchx 2024-07-23 16:56:40 INFO       Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)
+torchx 2024-07-23 16:56:40 INFO     Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)
+torchx 2024-07-23 16:56:41 INFO     Collecting safetensors
+torchx 2024-07-23 16:56:41 INFO       Downloading safetensors-0.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)
+torchx 2024-07-23 16:56:41 INFO     Collecting huggingface-hub
+torchx 2024-07-23 16:56:41 INFO       Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)
+torchx 2024-07-23 16:56:41 INFO     Collecting packaging>=20.9
+torchx 2024-07-23 16:56:41 INFO       Downloading packaging-24.0-py3-none-any.whl (53 kB)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)
+torchx 2024-07-23 16:56:41 INFO     Collecting importlib-metadata
+torchx 2024-07-23 16:56:41 INFO       Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)
+torchx 2024-07-23 16:56:41 INFO     Collecting fsspec
+torchx 2024-07-23 16:56:41 INFO       Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)
+torchx 2024-07-23 16:56:41 INFO     Collecting zipp>=0.5
+torchx 2024-07-23 16:56:41 INFO       Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)
+torchx 2024-07-23 16:56:41 INFO     Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)
+torchx 2024-07-23 16:56:42 INFO     Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm
+torchx 2024-07-23 16:56:42 INFO     Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.3 timm-0.9.12 zipp-3.15.0
+torchx 2024-07-23 16:56:45 INFO      ---> Removed intermediate container 48579788648a
+torchx 2024-07-23 16:56:45 INFO      ---> 7794e5bfad20
+torchx 2024-07-23 16:56:45 INFO     Step 3/4 : COPY . .
+torchx 2024-07-23 16:56:47 INFO      ---> 956583ec4cab
+torchx 2024-07-23 16:56:47 INFO     Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0
+torchx 2024-07-23 16:56:47 INFO      ---> Running in 2ef42420c318
+torchx 2024-07-23 16:56:50 INFO      ---> Removed intermediate container 2ef42420c318
+torchx 2024-07-23 16:56:50 INFO      ---> 088f502c1bad
+torchx 2024-07-23 16:56:50 INFO     [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed
+torchx 2024-07-23 16:56:50 INFO     Successfully built 088f502c1bad
+torchx 2024-07-23 16:56:50 INFO     Built new image `sha256:088f502c1badc4334099d3102be573ba00528ecebb9e30275ca2beb9a42af5f9` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.
+torchx 2024-07-23 16:56:50 INFO     Waiting for the app to finish...
 python/0 ResNet(
 python/0   (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
 python/0   (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
@@ -1348,7 +1348,7 @@ 

Docker-based Schedulers

diff --git a/0.8.0dev0/quickstart.ipynb b/0.8.0dev0/quickstart.ipynb index 3d41f9319..3216bd3f5 100644 --- a/0.8.0dev0/quickstart.ipynb +++ b/0.8.0dev0/quickstart.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "05f9ded3", + "id": "e4b687c7", "metadata": {}, "source": [ "# Quickstart\n", @@ -18,7 +18,7 @@ }, { "cell_type": "markdown", - "id": "b38393c7", + "id": "4e5f3b67", "metadata": { "region_name": "md" }, @@ -31,7 +31,7 @@ }, { "cell_type": "markdown", - "id": "3a1728e7", + "id": "751dabd8", "metadata": {}, "source": [ "See the [README](https://github.com/pytorch/torchx) for more\n", @@ -41,13 +41,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "2162cf3d", + "id": "6237e94a", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:05:55.515514Z", - "iopub.status.busy": "2024-07-22T21:05:55.514947Z", - "iopub.status.idle": "2024-07-22T21:05:55.807262Z", - "shell.execute_reply": "2024-07-22T21:05:55.806494Z" + "iopub.execute_input": "2024-07-23T16:54:27.197214Z", + "iopub.status.busy": "2024-07-23T16:54:27.196809Z", + "iopub.status.idle": "2024-07-23T16:54:27.497374Z", + "shell.execute_reply": "2024-07-23T16:54:27.496556Z" } }, "outputs": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "f3dd5224", + "id": "10017000", "metadata": {}, "source": [ "## Hello World\n", @@ -189,13 +189,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "4daa7493", + "id": "e10bd4f0", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:05:55.810017Z", - "iopub.status.busy": "2024-07-22T21:05:55.809701Z", - "iopub.status.idle": "2024-07-22T21:05:55.814224Z", - "shell.execute_reply": "2024-07-22T21:05:55.813616Z" + "iopub.execute_input": "2024-07-23T16:54:27.500267Z", + "iopub.status.busy": "2024-07-23T16:54:27.499864Z", + "iopub.status.idle": "2024-07-23T16:54:27.504430Z", + "shell.execute_reply": "2024-07-23T16:54:27.503833Z" } }, "outputs": [ @@ -217,7 +217,7 @@ }, { "cell_type": "markdown", - "id": "5ca44dea", + "id": "3cebb9d1", "metadata": {}, "source": [ "## Launching\n", @@ -231,13 +231,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "7fd632e9", + "id": "0eb549b9", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:05:55.816790Z", - "iopub.status.busy": "2024-07-22T21:05:55.816391Z", - "iopub.status.idle": "2024-07-22T21:05:56.867667Z", - "shell.execute_reply": "2024-07-22T21:05:56.866841Z" + "iopub.execute_input": "2024-07-23T16:54:27.506970Z", + "iopub.status.busy": "2024-07-23T16:54:27.506553Z", + "iopub.status.idle": "2024-07-23T16:54:28.540962Z", + "shell.execute_reply": "2024-07-23T16:54:28.540215Z" } }, "outputs": [ @@ -494,7 +494,7 @@ }, { "cell_type": "markdown", - "id": "5549c4c2", + "id": "df86f869", "metadata": {}, "source": [ "The component takes in the script name and any extra arguments will be passed to\n", @@ -504,13 +504,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "a3d96988", + "id": "c13410ce", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:05:56.872861Z", - "iopub.status.busy": "2024-07-22T21:05:56.872467Z", - "iopub.status.idle": "2024-07-22T21:05:58.886918Z", - "shell.execute_reply": "2024-07-22T21:05:58.886265Z" + "iopub.execute_input": "2024-07-23T16:54:28.547021Z", + "iopub.status.busy": "2024-07-23T16:54:28.546407Z", + "iopub.status.idle": "2024-07-23T16:54:30.558551Z", + "shell.execute_reply": "2024-07-23T16:54:30.557918Z" } }, "outputs": [ @@ -518,28 +518,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:57 INFO Tracker configurations: {}\n" + "torchx 2024-07-23 16:54:29 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:57 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" + "torchx 2024-07-23 16:54:29 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:57 INFO Log directory is: /tmp/torchx_57w4cxsf\n" + "torchx 2024-07-23 16:54:29 INFO Log directory is: /tmp/torchx_jnu_js7p\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:57 INFO Waiting for the app to finish...\n" + "torchx 2024-07-23 16:54:29 INFO Waiting for the app to finish...\n" ] }, { @@ -553,14 +553,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:58 INFO Job finished: SUCCEEDED\n" + "torchx 2024-07-23 16:54:30 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_cwd://torchx/torchx_utils_python-swqb6wpg7lk0d\n" + "local_cwd://torchx/torchx_utils_python-ccd2w13k05w6kd\n" ] } ], @@ -571,7 +571,7 @@ }, { "cell_type": "markdown", - "id": "c513465d", + "id": "a431966c", "metadata": {}, "source": [ "We can run the exact same app via the `local_docker` scheduler. This scheduler\n", @@ -590,13 +590,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "17b2862b", + "id": "2b1abcea", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:05:58.889999Z", - "iopub.status.busy": "2024-07-22T21:05:58.889516Z", - "iopub.status.idle": "2024-07-22T21:06:15.501441Z", - "shell.execute_reply": "2024-07-22T21:06:15.500782Z" + "iopub.execute_input": "2024-07-23T16:54:30.561372Z", + "iopub.status.busy": "2024-07-23T16:54:30.560959Z", + "iopub.status.idle": "2024-07-23T16:54:47.154520Z", + "shell.execute_reply": "2024-07-23T16:54:47.153863Z" } }, "outputs": [ @@ -604,126 +604,126 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:59 INFO Tracker configurations: {}\n" + "torchx 2024-07-23 16:54:31 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:59 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-07-23 16:54:31 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:05:59 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-07-23 16:54:31 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:00 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-07-23 16:54:31 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:00 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-07-23 16:54:31 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:00 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-07-23 16:54:31 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:00 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-07-23 16:54:31 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:00 INFO ---> dced593a08fb\n" + "torchx 2024-07-23 16:54:31 INFO ---> 57dff228bc2f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:00 INFO Step 3/4 : COPY . .\n" + "torchx 2024-07-23 16:54:31 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:07 INFO ---> b00c44e391a6\n" + "torchx 2024-07-23 16:54:38 INFO ---> 057c5a4a4d4c\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:07 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-07-23 16:54:38 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:07 INFO ---> Running in 85fe6ef573d4\n" + "torchx 2024-07-23 16:54:38 INFO ---> Running in 83919021be0b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:14 INFO ---> Removed intermediate container 85fe6ef573d4\n" + "torchx 2024-07-23 16:54:45 INFO ---> Removed intermediate container 83919021be0b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:14 INFO ---> c8c3e67341ab\n" + "torchx 2024-07-23 16:54:45 INFO ---> c9afc99a3d8f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:14 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-07-23 16:54:45 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:14 INFO Successfully built c8c3e67341ab\n" + "torchx 2024-07-23 16:54:45 INFO Successfully built c9afc99a3d8f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:14 INFO Built new image `sha256:c8c3e67341aba572d62b3d355b753f405de428ef3ae19c699b34ebad7b0aaefd` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" + "torchx 2024-07-23 16:54:45 INFO Built new image `sha256:c9afc99a3d8ff0884370fa40619566378dcd1fae5541b214edec5f1517632e4e` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:14 INFO Waiting for the app to finish...\n" + "torchx 2024-07-23 16:54:46 INFO Waiting for the app to finish...\n" ] }, { @@ -737,14 +737,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:15 INFO Job finished: SUCCEEDED\n" + "torchx 2024-07-23 16:54:47 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/torchx_utils_python-hwtj7tf0nz1rc\n" + "local_docker://torchx/torchx_utils_python-k71dwb41j4qk3\n" ] } ], @@ -755,7 +755,7 @@ }, { "cell_type": "markdown", - "id": "c605d786", + "id": "2eef0b49", "metadata": {}, "source": [ "TorchX defaults to using the\n", @@ -773,13 +773,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "aa6f071c", + "id": "dddf5c19", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:06:15.504232Z", - "iopub.status.busy": "2024-07-22T21:06:15.503947Z", - "iopub.status.idle": "2024-07-22T21:06:16.623509Z", - "shell.execute_reply": "2024-07-22T21:06:16.622763Z" + "iopub.execute_input": "2024-07-23T16:54:47.157489Z", + "iopub.status.busy": "2024-07-23T16:54:47.157071Z", + "iopub.status.idle": "2024-07-23T16:54:48.298817Z", + "shell.execute_reply": "2024-07-23T16:54:48.298059Z" } }, "outputs": [ @@ -1190,7 +1190,7 @@ }, { "cell_type": "markdown", - "id": "7108781f", + "id": "b0446d0c", "metadata": {}, "source": [ "Lets create a slightly more interesting app to leverage the TorchX distributed\n", @@ -1200,13 +1200,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "e5c5f8bb", + "id": "28a479f8", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:06:16.629621Z", - "iopub.status.busy": "2024-07-22T21:06:16.629205Z", - "iopub.status.idle": "2024-07-22T21:06:16.633603Z", - "shell.execute_reply": "2024-07-22T21:06:16.632948Z" + "iopub.execute_input": "2024-07-23T16:54:48.305905Z", + "iopub.status.busy": "2024-07-23T16:54:48.305618Z", + "iopub.status.idle": "2024-07-23T16:54:48.309904Z", + "shell.execute_reply": "2024-07-23T16:54:48.309264Z" } }, "outputs": [ @@ -1234,7 +1234,7 @@ }, { "cell_type": "markdown", - "id": "7b99bbe7", + "id": "d64d709e", "metadata": {}, "source": [ "Let launch a small job with 2 nodes and 2 worker processes per node:" @@ -1243,13 +1243,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "7da1592b", + "id": "e3a22f40", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:06:16.636239Z", - "iopub.status.busy": "2024-07-22T21:06:16.635752Z", - "iopub.status.idle": "2024-07-22T21:06:40.494933Z", - "shell.execute_reply": "2024-07-22T21:06:40.494148Z" + "iopub.execute_input": "2024-07-23T16:54:48.312330Z", + "iopub.status.busy": "2024-07-23T16:54:48.311953Z", + "iopub.status.idle": "2024-07-23T16:55:13.240701Z", + "shell.execute_reply": "2024-07-23T16:55:13.240033Z" } }, "outputs": [ @@ -1257,182 +1257,182 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:17 INFO Tracker configurations: {}\n" + "torchx 2024-07-23 16:54:49 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:17 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-07-23 16:54:49 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:17 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-07-23 16:54:49 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:17 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-07-23 16:54:49 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:18 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-07-23 16:54:49 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:18 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-07-23 16:54:49 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:18 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-07-23 16:54:49 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:18 INFO ---> dced593a08fb\n" + "torchx 2024-07-23 16:54:49 INFO ---> 57dff228bc2f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:18 INFO Step 3/4 : COPY . .\n" + "torchx 2024-07-23 16:54:49 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:25 INFO ---> 73b8d39ff9e9\n" + "torchx 2024-07-23 16:54:56 INFO ---> e20e4c6c041b\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:25 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-07-23 16:54:56 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:25 INFO ---> Running in fe7309557934\n" + "torchx 2024-07-23 16:54:56 INFO ---> Running in 917e33ee3a92\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:31 INFO ---> Removed intermediate container fe7309557934\n" + "torchx 2024-07-23 16:55:03 INFO ---> Removed intermediate container 917e33ee3a92\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:31 INFO ---> 2c8eaf9aaf2e\n" + "torchx 2024-07-23 16:55:03 INFO ---> 88da38ff059f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:31 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-07-23 16:55:03 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:31 INFO Successfully built 2c8eaf9aaf2e\n" + "torchx 2024-07-23 16:55:03 INFO Successfully built 88da38ff059f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:31 INFO Built new image `sha256:2c8eaf9aaf2e9a78ca98ea53c8efba5d579bb90040704afc6e81c4ebe23ceeea` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.\n" + "torchx 2024-07-23 16:55:03 INFO Built new image `sha256:88da38ff059f51ed9370709589413db39576cbab42045ff58bc0fe599472d703` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:32 INFO Waiting for the app to finish...\n" + "torchx 2024-07-23 16:55:04 INFO Waiting for the app to finish...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-07-22 21:06:33,446] torch.distributed.run: [WARNING] \n" + "dist_app/1 [2024-07-23 16:55:05,144] torch.distributed.run: [WARNING] \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-07-22 21:06:33,446] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/1 [2024-07-23 16:55:05,144] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-07-22 21:06:33,446] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" + "dist_app/1 [2024-07-23 16:55:05,144] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-07-22 21:06:33,446] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/1 [2024-07-23 16:55:05,144] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-07-22 21:06:33,485] torch.distributed.run: [WARNING] \n" + "dist_app/0 [2024-07-23 16:55:05,188] torch.distributed.run: [WARNING] \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-07-22 21:06:33,485] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/0 [2024-07-23 16:55:05,188] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-07-22 21:06:33,485] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" + "dist_app/0 [2024-07-23 16:55:05,188] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-07-22 21:06:33,485] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/0 [2024-07-23 16:55:05,188] torch.distributed.run: [WARNING] *****************************************\n" ] }, { @@ -1446,63 +1446,63 @@ "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [0]:I am worker 2 of 4!\n" + "dist_app/1 [1]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [0]:all_reduce output = tensor([6])\n" + "dist_app/1 [0]:I am worker 2 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [1]:all_reduce output = tensor([6])\n" + "dist_app/1 [0]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [0]:I am worker 0 of 4!\n" + "dist_app/0 [1]:I am worker 1 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [0]:all_reduce output = tensor([6])\n" + "dist_app/0 [1]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [1]:I am worker 1 of 4!\n" + "dist_app/0 [0]:I am worker 0 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [1]:all_reduce output = tensor([6])\n" + "dist_app/0 [0]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:40 INFO Job finished: SUCCEEDED\n" + "torchx 2024-07-23 16:55:13 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/dist_app-r3g9fcw4bz5h2c\n" + "local_docker://torchx/dist_app-rkpk9xmkfv9gcc\n" ] } ], @@ -1513,7 +1513,7 @@ }, { "cell_type": "markdown", - "id": "1c2573b3", + "id": "bf841978", "metadata": {}, "source": [ "## Workspaces / Patching\n", @@ -1542,13 +1542,13 @@ { "cell_type": "code", "execution_count": 9, - "id": "ee7a0ede", + "id": "08658bf9", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:06:40.498435Z", - "iopub.status.busy": "2024-07-22T21:06:40.497777Z", - "iopub.status.idle": "2024-07-22T21:06:40.502315Z", - "shell.execute_reply": "2024-07-22T21:06:40.501711Z" + "iopub.execute_input": "2024-07-23T16:55:13.243614Z", + "iopub.status.busy": "2024-07-23T16:55:13.243346Z", + "iopub.status.idle": "2024-07-23T16:55:13.247797Z", + "shell.execute_reply": "2024-07-23T16:55:13.247111Z" } }, "outputs": [ @@ -1573,7 +1573,7 @@ }, { "cell_type": "markdown", - "id": "eb74bcd8", + "id": "b5e9a517", "metadata": {}, "source": [ "## Remote Schedulers\n", @@ -1588,7 +1588,7 @@ }, { "cell_type": "markdown", - "id": "e8bd1df4", + "id": "f687d316", "metadata": { "region_name": "md" }, @@ -1603,7 +1603,7 @@ }, { "cell_type": "markdown", - "id": "ed2b8aae", + "id": "27ff3629", "metadata": {}, "source": [ "Depending on the scheduler there may be a few extra configuration parameters so\n", @@ -1613,7 +1613,7 @@ }, { "cell_type": "markdown", - "id": "61ede02a", + "id": "c804cafa", "metadata": {}, "source": [ "All config options:" @@ -1622,13 +1622,13 @@ { "cell_type": "code", "execution_count": 10, - "id": "3f50ec76", + "id": "0914d5e6", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:06:40.505027Z", - "iopub.status.busy": "2024-07-22T21:06:40.504652Z", - "iopub.status.idle": "2024-07-22T21:06:42.324966Z", - "shell.execute_reply": "2024-07-22T21:06:42.324297Z" + "iopub.execute_input": "2024-07-23T16:55:13.250523Z", + "iopub.status.busy": "2024-07-23T16:55:13.250039Z", + "iopub.status.idle": "2024-07-23T16:55:15.092678Z", + "shell.execute_reply": "2024-07-23T16:55:15.091995Z" }, "lines_to_next_cell": 2 }, @@ -3412,7 +3412,7 @@ }, { "cell_type": "markdown", - "id": "937ae017", + "id": "b9885b5f", "metadata": {}, "source": [ "## Custom Images\n", @@ -3427,13 +3427,13 @@ { "cell_type": "code", "execution_count": 11, - "id": "3d8ea77f", + "id": "e58d9d81", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:06:42.328228Z", - "iopub.status.busy": "2024-07-22T21:06:42.327697Z", - "iopub.status.idle": "2024-07-22T21:06:42.332212Z", - "shell.execute_reply": "2024-07-22T21:06:42.331638Z" + "iopub.execute_input": "2024-07-23T16:55:15.095919Z", + "iopub.status.busy": "2024-07-23T16:55:15.095548Z", + "iopub.status.idle": "2024-07-23T16:55:15.100259Z", + "shell.execute_reply": "2024-07-23T16:55:15.099694Z" } }, "outputs": [ @@ -3456,13 +3456,13 @@ { "cell_type": "code", "execution_count": 12, - "id": "e4a93872", + "id": "9cdb28fa", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:06:42.334729Z", - "iopub.status.busy": "2024-07-22T21:06:42.334263Z", - "iopub.status.idle": "2024-07-22T21:06:42.338015Z", - "shell.execute_reply": "2024-07-22T21:06:42.337361Z" + "iopub.execute_input": "2024-07-23T16:55:15.102796Z", + "iopub.status.busy": "2024-07-23T16:55:15.102273Z", + "iopub.status.idle": "2024-07-23T16:55:15.106106Z", + "shell.execute_reply": "2024-07-23T16:55:15.105451Z" } }, "outputs": [ @@ -3486,7 +3486,7 @@ }, { "cell_type": "markdown", - "id": "90387b31", + "id": "90e7bbf6", "metadata": {}, "source": [ "Once we have the Dockerfile created we can launch as normal and TorchX will\n", @@ -3497,13 +3497,13 @@ { "cell_type": "code", "execution_count": 13, - "id": "5b8bb549", + "id": "00524dd9", "metadata": { "execution": { - "iopub.execute_input": "2024-07-22T21:06:42.340713Z", - "iopub.status.busy": "2024-07-22T21:06:42.340225Z", - "iopub.status.idle": "2024-07-22T21:08:07.499207Z", - "shell.execute_reply": "2024-07-22T21:08:07.498414Z" + "iopub.execute_input": "2024-07-23T16:55:15.108467Z", + "iopub.status.busy": "2024-07-23T16:55:15.108123Z", + "iopub.status.idle": "2024-07-23T16:56:52.467345Z", + "shell.execute_reply": "2024-07-23T16:56:52.466450Z" } }, "outputs": [ @@ -3511,357 +3511,357 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:42 INFO loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig\n" + "torchx 2024-07-23 16:55:15 INFO loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:43 INFO Tracker configurations: {}\n" + "torchx 2024-07-23 16:55:16 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:43 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-07-23 16:55:16 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:43 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-07-23 16:55:16 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:43 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-07-23 16:55:16 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:43 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-07-23 16:55:16 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:06:43 INFO Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime\n" + "torchx 2024-07-23 16:55:16 INFO Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO ---> c3f17e5ac010\n" + "torchx 2024-07-23 16:56:40 INFO ---> c3f17e5ac010\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO Step 2/4 : RUN pip install timm\n" + "torchx 2024-07-23 16:56:40 INFO Step 2/4 : RUN pip install timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO ---> Running in f2ea0a555437\n" + "torchx 2024-07-23 16:56:40 INFO ---> Running in 48579788648a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO Collecting timm\n" + "torchx 2024-07-23 16:56:40 INFO Collecting timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)\n" + "torchx 2024-07-23 16:56:40 INFO Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)\n" + "torchx 2024-07-23 16:56:40 INFO Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO Collecting huggingface-hub\n" + "torchx 2024-07-23 16:56:41 INFO Collecting safetensors\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n" + "torchx 2024-07-23 16:56:41 INFO Downloading safetensors-0.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:55 INFO Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Collecting safetensors\n" + "torchx 2024-07-23 16:56:41 INFO Collecting huggingface-hub\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Downloading safetensors-0.4.3-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)\n" + "torchx 2024-07-23 16:56:41 INFO Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Collecting packaging>=20.9\n" + "torchx 2024-07-23 16:56:41 INFO Collecting packaging>=20.9\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Downloading packaging-24.0-py3-none-any.whl (53 kB)\n" + "torchx 2024-07-23 16:56:41 INFO Downloading packaging-24.0-py3-none-any.whl (53 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Collecting fsspec\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)\n" + "torchx 2024-07-23 16:56:41 INFO Collecting importlib-metadata\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)\n" + "torchx 2024-07-23 16:56:41 INFO Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)\n" + "torchx 2024-07-23 16:56:41 INFO Collecting fsspec\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Collecting importlib-metadata\n" + "torchx 2024-07-23 16:56:41 INFO Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Collecting zipp>=0.5\n" + "torchx 2024-07-23 16:56:41 INFO Collecting zipp>=0.5\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)\n" + "torchx 2024-07-23 16:56:41 INFO Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:56 INFO Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)\n" + "torchx 2024-07-23 16:56:41 INFO Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:57 INFO Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm\n" + "torchx 2024-07-23 16:56:42 INFO Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:07:57 INFO Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.3 timm-0.9.12 zipp-3.15.0\n" + "torchx 2024-07-23 16:56:42 INFO Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.3 timm-0.9.12 zipp-3.15.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:00 INFO ---> Removed intermediate container f2ea0a555437\n" + "torchx 2024-07-23 16:56:45 INFO ---> Removed intermediate container 48579788648a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:00 INFO ---> 0fdc9a81a479\n" + "torchx 2024-07-23 16:56:45 INFO ---> 7794e5bfad20\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:00 INFO Step 3/4 : COPY . .\n" + "torchx 2024-07-23 16:56:45 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:03 INFO ---> 25513fc7a6fb\n" + "torchx 2024-07-23 16:56:47 INFO ---> 956583ec4cab\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:03 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" + "torchx 2024-07-23 16:56:47 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.8.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:03 INFO ---> Running in c6646d9ef0c4\n" + "torchx 2024-07-23 16:56:47 INFO ---> Running in 2ef42420c318\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:05 INFO ---> Removed intermediate container c6646d9ef0c4\n" + "torchx 2024-07-23 16:56:50 INFO ---> Removed intermediate container 2ef42420c318\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:05 INFO ---> c67744da5bd8\n" + "torchx 2024-07-23 16:56:50 INFO ---> 088f502c1bad\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:05 INFO [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed\n" + "torchx 2024-07-23 16:56:50 INFO [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:05 INFO Successfully built c67744da5bd8\n" + "torchx 2024-07-23 16:56:50 INFO Successfully built 088f502c1bad\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:05 INFO Built new image `sha256:c67744da5bd8865705a30e6d7180d7902ec16780f29a6591446985e1f56ad660` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" + "torchx 2024-07-23 16:56:50 INFO Built new image `sha256:088f502c1badc4334099d3102be573ba00528ecebb9e30275ca2beb9a42af5f9` based on original image `ghcr.io/pytorch/torchx:0.8.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:05 INFO Waiting for the app to finish...\n" + "torchx 2024-07-23 16:56:50 INFO Waiting for the app to finish...\n" ] }, { @@ -4624,14 +4624,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-07-22 21:08:07 INFO Job finished: SUCCEEDED\n" + "torchx 2024-07-23 16:56:52 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/torchx_utils_python-jrz1vzq147jk3c\n" + "local_docker://torchx/torchx_utils_python-b92d55vkth042\n" ] } ], @@ -4642,7 +4642,7 @@ }, { "cell_type": "markdown", - "id": "1923b023", + "id": "054becbb", "metadata": {}, "source": [ "### Slurm\n", @@ -4653,7 +4653,7 @@ }, { "cell_type": "markdown", - "id": "64eb50c5", + "id": "acdf1953", "metadata": {}, "source": [ "## Next Steps\n", diff --git a/0.8.0dev0/searchindex.js b/0.8.0dev0/searchindex.js index 5393301d5..d9b7060b6 100644 --- a/0.8.0dev0/searchindex.js +++ b/0.8.0dev0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["advanced", "app_best_practices", "basics", "cli", "component_best_practices", "components/distributed", "components/interpret", "components/metrics", "components/overview", "components/serve", "components/train", "components/utils", "custom_components", "examples_apps/compute_world_size/main", "examples_apps/datapreproc/datapreproc", "examples_apps/index", "examples_apps/lightning/data", "examples_apps/lightning/interpret", "examples_apps/lightning/model", "examples_apps/lightning/profiler", "examples_apps/lightning/train", "examples_pipelines/index", "examples_pipelines/kfp/advanced_pipeline", "examples_pipelines/kfp/dist_pipeline", "examples_pipelines/kfp/intro_pipeline", "index", "pipelines", "pipelines/airflow", "pipelines/kfp", "quickstart", "runner", "runner.config", "runtime/overview", "runtime/tracking", "schedulers", "schedulers/aws_batch", "schedulers/aws_sagemaker", "schedulers/docker", "schedulers/gcp_batch", "schedulers/kubernetes", "schedulers/kubernetes_mcad", "schedulers/local", "schedulers/lsf", "schedulers/ray", "schedulers/slurm", "specs", "tracker", "workspace"], "filenames": ["advanced.rst", "app_best_practices.rst", "basics.rst", "cli.rst", "component_best_practices.rst", "components/distributed.rst", "components/interpret.rst", "components/metrics.rst", "components/overview.rst", "components/serve.rst", "components/train.rst", "components/utils.rst", "custom_components.md", "examples_apps/compute_world_size/main.rst", "examples_apps/datapreproc/datapreproc.rst", "examples_apps/index.rst", "examples_apps/lightning/data.rst", "examples_apps/lightning/interpret.rst", "examples_apps/lightning/model.rst", "examples_apps/lightning/profiler.rst", "examples_apps/lightning/train.rst", "examples_pipelines/index.rst", "examples_pipelines/kfp/advanced_pipeline.rst", "examples_pipelines/kfp/dist_pipeline.rst", "examples_pipelines/kfp/intro_pipeline.rst", "index.rst", "pipelines.rst", "pipelines/airflow.md", "pipelines/kfp.rst", "quickstart.md", "runner.rst", "runner.config.rst", "runtime/overview.rst", "runtime/tracking.rst", "schedulers.rst", "schedulers/aws_batch.rst", "schedulers/aws_sagemaker.rst", "schedulers/docker.rst", "schedulers/gcp_batch.rst", "schedulers/kubernetes.rst", "schedulers/kubernetes_mcad.rst", "schedulers/local.rst", "schedulers/lsf.rst", "schedulers/ray.rst", "schedulers/slurm.rst", "specs.rst", "tracker.rst", "workspace.rst"], "titles": ["Advanced Usage", "App Best Practices", "Basic Concepts", "CLI", "Component Best Practices", "Distributed", "Interpret", "Metrics", "Overview", "Serve", "Train", "Utils", "Custom Components", "Compute World Size Example", "Data Preprocessing App Example", "Application Examples", "Trainer Datasets Example", "Model Interpretability Example", "Tiny ImageNet Model", "Simple Logging Profiler", "Trainer Example", "Pipelines Examples", "Advanced KubeFlow Pipelines Example", "Distributed KubeFlow Pipelines Example", "Intro KubeFlow Pipelines Example", "TorchX", "torchx.pipelines", "Airflow", "Kubeflow Pipelines", "Quickstart", "torchx.runner", ".torchxconfig", "Overview", "Tracking", "torchx.schedulers", "AWS Batch", "AWS SageMaker", "Docker", "GCP Batch", "Kubernetes", "Kubernetes-MCAD", "Local", "IBM Spectrum LSF", "Ray", "Slurm", "torchx.specs", "torchx.tracker", "torchx.workspace"], "terms": {"torchx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 27, 29, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "defin": [0, 2, 3, 8, 16, 22, 23, 24, 27, 29, 31, 34, 36, 40, 45, 46, 47], "plugin": [0, 34, 39, 40], "point": [0, 1, 2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "you": [0, 1, 2, 3, 4, 5, 7, 8, 10, 12, 14, 15, 16, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 42, 43, 45, 47], "configur": [0, 4, 12, 27, 29, 30, 31, 34, 35, 36, 38, 40, 43, 44, 45, 47], "best": [0, 2, 3, 8, 10, 30], "support": [0, 1, 2, 4, 8, 11, 25, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "your": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 18, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 42, 45, 46, 47], "infrastructur": [0, 1, 2, 13, 15, 32], "setup": [0, 3, 4, 13, 15, 16, 17, 20, 22, 35, 45], "most": [0, 1, 2, 3, 22, 45], "done": [0, 12, 27], "through": [0, 2, 3, 8, 27, 28, 29, 31, 45, 47], "python": [0, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 30, 43, 45], "s": [0, 1, 3, 4, 5, 8, 11, 14, 15, 16, 17, 18, 19, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "entri": [0, 29, 30, 36], "requir": [0, 2, 4, 5, 8, 10, 11, 12, 14, 15, 17, 22, 29, 31, 32, 35, 36, 39, 40, 41, 43, 45, 46, 47], "packag": [0, 22, 27, 29, 30, 46], "contain": [0, 2, 3, 5, 8, 9, 11, 12, 15, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 47], "them": [0, 1, 2, 3, 4, 8, 12, 16, 17, 20, 22, 23, 26, 31, 34, 35, 44, 45], "instal": [0, 8, 12, 13, 15, 23, 25, 27, 28, 35, 37, 38, 39, 40, 42, 43, 44, 45], "If": [0, 1, 2, 3, 4, 8, 12, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "don": [0, 1, 2, 8, 10, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "t": [0, 1, 2, 4, 8, 10, 12, 18, 22, 27, 28, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "have": [0, 1, 2, 3, 4, 5, 8, 10, 12, 14, 15, 16, 18, 20, 22, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "we": [0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 16, 18, 22, 23, 24, 27, 29, 34, 39, 44], "recommend": [0, 1, 2, 4, 6, 7, 30, 40], "make": [0, 1, 2, 4, 9, 13, 20, 22, 24, 29, 30, 31, 32, 33, 41, 45, 47], "one": [0, 1, 2, 3, 4, 5, 8, 22, 24, 29, 30, 31, 33, 36, 37, 39, 41, 45], "so": [0, 1, 3, 4, 5, 8, 12, 15, 16, 22, 26, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "can": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "share": [0, 4, 8, 12, 15, 29, 35, 42], "definit": [0, 1, 2, 4, 8, 9, 12, 15, 21, 23, 24, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "across": [0, 1, 2, 15, 16, 33], "team": 0, "org": [0, 7, 9, 12, 28, 29, 30, 45], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 14, 15, 17, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "describ": [0, 1, 2, 24, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "below": [0, 2, 3, 8, 17, 30, 31, 45], "specifi": [0, 2, 3, 4, 5, 7, 8, 11, 14, 16, 17, 19, 20, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "project": [0, 13, 29, 31, 38, 40, 47], "py": [0, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 41, 42, 43, 45], "file": [0, 1, 2, 3, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 22, 23, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "from": [0, 1, 2, 3, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "setuptool": 0, "import": [0, 1, 2, 3, 4, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 31, 33, 39, 40, 45, 46], "foobar": [0, 31, 33, 41, 45], "entry_point": [0, 46], "my_schedul": 0, "my": 0, "create_schedul": [0, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "named_resourc": [0, 8, 45], "gpu_x2": 0, "my_modul": [0, 45, 46], "mai": [0, 1, 2, 3, 8, 11, 12, 20, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "implement": [0, 15, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "class": [0, 1, 4, 16, 18, 19, 28, 29, 31, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "interfac": [0, 2, 34, 35, 36, 37, 38, 39, 40, 43, 44, 46, 47], "function": [0, 2, 3, 4, 5, 8, 11, 12, 13, 18, 23, 24, 27, 28, 29, 36, 45], "should": [0, 2, 3, 4, 8, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "follow": [0, 2, 5, 8, 12, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "signatur": 0, "def": [0, 1, 2, 3, 4, 8, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 31, 43, 44, 45], "session_nam": [0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "str": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "kwarg": [0, 16, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "object": [0, 2, 3, 8, 9, 16, 22, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "return": [0, 2, 4, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 27, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "myschedul": 0, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "ad": [0, 1, 2, 8, 30, 31, 44, 46], "an": [0, 2, 3, 4, 6, 7, 8, 10, 12, 13, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "A": [0, 2, 5, 8, 31, 45, 47], "set": [0, 1, 3, 4, 5, 11, 12, 22, 23, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 44, 45, 47], "predefin": [0, 2], "spec": [0, 2, 3, 4, 5, 8, 9, 12, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 37, 39, 40, 41, 42], "ar": [0, 1, 2, 3, 4, 5, 7, 8, 11, 15, 17, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "given": [0, 2, 3, 8, 9, 16, 30, 31, 41, 43, 45], "string": [0, 3, 4, 8, 11, 29, 41, 45, 47], "particularli": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "when": [0, 1, 2, 3, 4, 5, 7, 8, 22, 25, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cluster": [0, 2, 12, 14, 21, 22, 23, 24, 27, 29, 39, 40, 42, 43, 44, 45, 47], "ha": [0, 2, 4, 5, 8, 10, 11, 13, 15, 17, 19, 20, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "fix": [0, 2], "instanc": [0, 2, 4, 5, 7, 8, 12, 22, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "type": [0, 1, 2, 3, 4, 5, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45], "For": [0, 1, 2, 4, 5, 7, 8, 10, 15, 16, 17, 20, 22, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "deep": 0, "learn": [0, 1, 2, 3, 10, 20, 27, 40], "train": [0, 4, 5, 7, 8, 13, 15, 16, 17, 18, 19, 20, 22, 25, 27, 29, 31, 33, 36, 40], "kubernet": [0, 2, 3, 5, 12, 14, 17, 22, 23, 25, 28, 29, 30, 31, 34, 35, 37], "aw": [0, 2, 25, 29, 34, 44], "compris": 0, "onli": [0, 2, 3, 5, 8, 20, 22, 23, 24, 28, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "p3": 0, "16xlarg": 0, "64": [0, 16, 17, 29, 45], "vcpu": 0, "8": [0, 5, 7, 9, 11, 12, 29, 33, 41], "gpu": [0, 2, 3, 5, 11, 20, 27, 29, 39, 40, 41, 42, 45], "488gb": 0, "want": [0, 1, 2, 3, 4, 12, 22, 29, 31, 35], "enumer": [0, 17], "shirt": [0, 4], "size": [0, 4, 10, 16, 17, 20, 29, 33, 36, 42, 45], "gpu_x1": 0, "cpu": [0, 2, 3, 4, 5, 11, 20, 22, 29, 31, 39, 40, 45], "1": [0, 1, 2, 3, 4, 5, 8, 11, 12, 14, 16, 17, 18, 20, 22, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "memmb": [0, 2, 3, 5, 11, 22, 29, 39, 40, 45], "61_000": 0, "16": [0, 3, 12, 16, 29], "2": [0, 2, 3, 5, 8, 11, 12, 17, 20, 27, 28, 29, 31, 33, 39, 40, 41, 42, 45], "122_000": 0, "gpu_x3": 0, "32": [0, 12, 20, 29], "4": [0, 2, 5, 8, 12, 29, 31, 33, 39, 40, 41, 45], "244_000": 0, "gpu_x4": 0, "488_000": 0, "To": [0, 1, 2, 3, 8, 12, 14, 15, 16, 20, 22, 23, 24, 27, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "avail": [0, 8, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "need": [0, 1, 2, 3, 4, 7, 8, 13, 16, 22, 23, 24, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "via": [0, 1, 4, 7, 8, 11, 12, 14, 15, 16, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onc": [0, 1, 5, 8, 12, 22, 23, 24, 27, 29, 30, 34, 41], "manner": 0, "get_named_resourc": [0, 4, 45], "122000": 0, "appdef": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "role": [0, 2, 3, 4, 5, 8, 12, 23, 24, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "test_app": 0, "imag": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 22, 23, 24, 28, 31, 35, 36, 37, 39, 40, 42, 45, 47], "author": [0, 2, 3, 4, 10, 32], "cli": [0, 2, 4, 12, 15, 22, 25, 29, 30, 34, 35, 44, 46], "builtin": [0, 1, 2, 4, 13, 22, 23, 24, 27, 29, 30, 31], "possibl": [0, 1, 2, 4, 24, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "relev": [0, 41], "organ": [0, 8], "wai": [0, 1, 2, 5, 8, 12, 29, 33, 39, 40, 41], "user": [0, 1, 2, 3, 4, 10, 12, 29, 30, 31, 34, 35, 36, 39, 40, 41, 44, 45, 47], "see": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 22, 23, 24, 27, 28, 29, 30, 31, 35, 37, 38, 39, 40, 42, 44, 45, 47], "thei": [0, 2, 4, 5, 8, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "run": [0, 1, 2, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "entrypoint": [0, 2, 3, 5, 8, 10, 11, 12, 23, 24, 30, 34, 41, 44, 45, 46], "my_project": 0, "bar": [0, 2, 3, 8, 11, 14, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "had": 0, "directori": [0, 4, 8, 11, 12, 13, 16, 27, 29, 31, 33, 35, 36, 37, 41, 42, 43, 44, 45, 46, 47], "structur": [0, 1, 4, 8, 34, 45], "project_root": 0, "baz": [0, 31], "And": [0, 2], "singl": [0, 2, 4, 5, 8, 13, 14, 15, 16, 17, 18, 24, 26, 28, 29, 31, 43, 44, 45], "call": [0, 2, 4, 5, 8, 11, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "trainer": [0, 1, 2, 3, 4, 5, 7, 8, 10, 17, 19, 22, 28, 30, 33, 40, 45], "were": [0, 2, 3, 12, 29, 30], "foo": [0, 2, 3, 11, 14, 28, 29, 30, 31, 33, 39, 40, 43, 45, 47], "search": [0, 20, 31], "modul": [0, 2, 3, 4, 5, 8, 11, 13, 15, 16, 17, 20, 26, 28, 29, 30, 32, 33, 45, 46, 47], "all": [0, 1, 3, 8, 15, 17, 20, 21, 22, 23, 24, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "group": [0, 5, 13, 15, 27, 29, 30, 36, 44, 46], "found": [0, 3, 8, 12, 31, 44], "under": [0, 2, 3, 5, 8, 12, 17, 22, 29, 33, 46, 47], "prefix": [0, 3, 8, 29, 31, 36], "In": [0, 2, 3, 8, 22, 31, 33, 40, 47], "case": [0, 1, 2, 3, 4, 8, 22, 31, 40, 47], "would": [0, 1, 2, 4, 8, 12, 22, 29, 30, 31, 34, 41], "those": [0, 2, 28, 29, 31, 34, 41, 47], "__init__": [0, 16, 18, 19], "attempt": [0, 3, 19, 27, 29, 33, 36], "recurs": [0, 11], "namespac": [0, 14, 17, 20, 22, 29, 31, 39, 40], "without": [0, 1, 3, 15, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "howev": [0, 1, 2, 4, 10, 34, 40, 41, 45], "top": [0, 2, 29, 31, 37, 47], "level": [0, 2, 29, 30, 31, 33, 36, 39, 40, 46], "displai": [0, 39], "test": [0, 11, 13, 16, 17, 23, 28, 34, 39, 41, 43], "app": [0, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "own": [0, 1, 2, 3, 8, 11, 22, 29, 33, 34, 45, 46], "includ": [0, 1, 2, 3, 8, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "its": [0, 2, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "add": [0, 12, 14, 24, 29, 31, 34, 39, 40, 45, 47], "must": [0, 5, 8, 14, 16, 17, 18, 28, 29, 31, 35, 36, 37, 39, 40, 41, 42, 44, 45, 46, 47], "anoth": [0, 22, 33], "back": [0, 12, 33, 35], "e": [0, 2, 5, 8, 11, 15, 17, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "g": [0, 2, 5, 8, 11, 15, 17, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dist": [0, 4, 5, 8, 10, 12, 13, 15, 20, 22, 23, 29, 31, 40, 42, 45], "ddp": [0, 2, 4, 8, 12, 13, 15, 20, 22, 29, 30, 31, 42, 45], "versu": 0, "default": [0, 3, 5, 8, 12, 13, 14, 20, 22, 23, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 45], "two": [0, 2, 5, 12, 15, 22, 29, 33, 36, 39], "registri": [0, 29, 35, 36], "same": [0, 3, 8, 18, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "There": [0, 1, 2, 3], "overlap": 0, "differ": [0, 1, 2, 3, 4, 5, 8, 12, 14, 16, 31, 33, 40, 41, 44], "alias": 0, "concret": 0, "omit": [0, 2, 3, 8, 31], "shorter": 0, "underscor": 0, "_": [0, 14, 16, 18, 29], "_0": 0, "_1": 0, "etc": [0, 3, 15, 29, 34, 42], "exampl": [0, 2, 3, 6, 7, 8, 10, 12, 18, 19, 27, 28, 29, 31, 33, 35, 36, 37, 39, 40, 41, 42, 45, 47], "effect": [0, 4, 5, 17, 20, 29, 30, 35], "expos": [0, 30, 37, 39, 40, 41, 45, 46], "oppos": 0, "vanilla": 0, "11": [0, 12, 29], "3": [0, 3, 5, 8, 12, 16, 17, 18, 20, 23, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "util": [0, 1, 2, 3, 8, 10, 12, 13, 14, 15, 16, 17, 22, 25, 27, 29, 32, 35, 36, 38, 39, 40, 42, 44], "more": [0, 1, 2, 4, 5, 8, 10, 11, 12, 13, 17, 20, 22, 23, 24, 26, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "written": [1, 2, 5, 31], "ani": [1, 4, 8, 11, 12, 15, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "languag": 1, "well": [1, 3, 4, 7, 8, 10, 11, 22, 26, 27, 28, 30, 44], "librari": [1, 2, 8, 11, 12, 16, 20, 29, 32, 36, 43], "allow": [1, 2, 3, 4, 5, 11, 12, 16, 22, 24, 26, 29, 31, 33, 34, 41, 44, 45, 46, 47], "maximum": [1, 7, 29, 40, 44], "flexibl": [1, 2, 4], "do": [1, 2, 3, 4, 8, 10, 13, 15, 18, 29, 30, 39, 41, 45], "standard": [1, 20, 22, 24, 29, 45, 47], "start": [1, 7, 8, 9, 12, 19, 25, 27, 29, 30, 31, 35, 36, 38, 41, 45], "provid": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 17, 20, 22, 26, 28, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47], "consist": [1, 7, 15, 30, 33, 45], "built": [1, 3, 6, 12, 15, 17, 22, 24, 29, 33, 36, 47], "compon": [1, 6, 7, 9, 10, 11, 13, 15, 16, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 38, 39, 40, 41, 42, 44], "applic": [1, 2, 3, 5, 6, 8, 9, 13, 20, 27, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "inform": [1, 2, 4, 8, 10, 29, 30, 34, 35, 36, 39, 45, 46], "how": [1, 2, 3, 4, 6, 7, 8, 10, 12, 13, 15, 22, 23, 24, 28, 29, 30, 31, 35, 36, 40, 45, 46], "handl": [1, 2, 3, 18, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "manag": [1, 9, 22, 29, 36, 41, 45], "fsspec": [1, 2, 7, 9, 11, 14, 15, 16, 17, 18, 20, 22, 29, 46, 47], "pluggabl": [1, 46], "filesystem": [1, 2, 12, 22, 29, 35, 39, 40, 47], "just": [1, 4, 8, 12, 18, 22, 24, 29, 30, 31, 45], "chang": [1, 3, 12, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "input": [1, 2, 17, 29, 33, 36, 45, 46], "output": [1, 11, 14, 17, 19, 20, 22, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "path": [1, 2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 16, 17, 18, 20, 22, 29, 30, 31, 33, 35, 36, 37, 39, 40, 41, 43, 45, 47], "access": [1, 2, 3, 4, 11, 12, 22, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "new": [1, 2, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "environ": [1, 4, 5, 11, 12, 20, 29, 31, 35, 36, 37, 38, 41, 43, 45, 46], "backend": [1, 2, 3, 5, 29, 30, 34, 46], "pytorch": [1, 2, 5, 7, 9, 11, 12, 13, 15, 16, 20, 22, 25, 27, 29, 39, 45], "lightn": [1, 2, 7, 16, 17, 18, 19, 20, 22], "out": [1, 2, 3, 4, 6, 8, 10, 11, 14, 17, 18, 29, 30, 33, 34, 44, 45], "box": [1, 4, 6, 8, 10, 11, 29, 34], "elsewher": 1, "seamless": 1, "integr": [1, 17, 22, 27], "remot": [1, 2, 5, 14, 16, 17, 20, 22, 25, 27, 34, 35, 36, 37, 39, 41, 47], "also": [1, 2, 3, 12, 13, 18, 29, 31, 33, 35, 36, 45], "easier": [1, 4, 9, 32], "transit": [1, 30], "distribut": [1, 2, 8, 10, 13, 15, 20, 21, 24, 25, 27, 28, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "elast": [1, 4, 5, 13, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "lot": 1, "depend": [1, 2, 3, 8, 13, 15, 20, 22, 27, 29, 30, 34, 36, 39, 40, 41, 43, 45, 46, 47], "architectur": [1, 20], "which": [1, 2, 3, 5, 7, 8, 13, 15, 16, 19, 20, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "why": [1, 18, 28], "some": [1, 2, 3, 14, 15, 18, 22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "common": [1, 2, 4, 5], "choic": [1, 8], "pure": [1, 2, 4, 8], "light": 1, "ignit": 1, "log": [1, 7, 12, 15, 18, 20, 22, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "monitor": [1, 7, 20, 29, 36], "job": [1, 2, 4, 5, 11, 12, 13, 15, 23, 25, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "standalon": [1, 2, 8, 12, 18, 29, 30], "tensorboard": [1, 3, 7, 12, 19, 20, 22, 45], "sinc": [1, 2, 3, 4, 5, 7, 8, 28, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "nativ": [1, 37], "like": [1, 2, 4, 12, 17, 22, 24, 27, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "s3": [1, 2, 9, 11, 22, 29, 33, 36, 45, 46], "gc": 1, "view": 1, "complex": [1, 4, 24, 26], "about": [1, 2, 5, 10, 27, 30, 33, 40], "while": [1, 2, 12, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "within": [1, 5, 8, 22, 27, 29, 30, 33, 34, 35, 36, 45, 46], "period": [1, 7], "recov": 1, "failur": [1, 45], "restart": [1, 37, 45], "paramet": [1, 2, 3, 4, 5, 7, 8, 9, 11, 18, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "lose": 1, "progress": [1, 7, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "thing": [1, 3, 4, 29, 33], "transfer": [1, 12], "resum": 1, "command": [1, 3, 8, 10, 11, 29, 31, 37, 45, 46], "line": [1, 3, 10, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "argument": [1, 2, 5, 8, 10, 11, 20, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "transient": 1, "error": [1, 3, 11, 12, 13, 29, 41, 45], "continu": [1, 14, 16, 17, 26], "later": [1, 16, 31], "adjust": [1, 45], "rate": [1, 20], "load": [1, 2, 12, 16, 17, 20, 22, 29, 31, 33, 35, 36, 38], "less": [1, 29, 41, 44], "code": [1, 2, 4, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 31, 33, 36, 42, 44, 45, 46, 47], "better": [1, 4], "maintain": [1, 2], "number": [1, 5, 6, 8, 11, 12, 14, 16, 20, 29, 35, 36, 40, 41, 45, 46], "similar": [1, 2, 4, 29, 41, 45], "task": [1, 27, 39, 42], "captum": [1, 6, 15, 17], "analys": 1, "result": [1, 3, 4, 11, 17, 22, 28, 29, 30, 33, 34, 36, 41, 45, 46], "interact": [1, 9, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "jupyt": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29], "notebook": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 47], "commun": [1, 42], "hasn": 1, "format": [1, 4, 5, 8, 19, 28, 29, 30, 31, 33, 34, 44, 45], "here": [1, 3, 5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "coupl": 1, "option": [1, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 18, 20, 22, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "might": [1, 4, 29, 36], "ll": [1, 2, 3, 15, 22, 29, 31, 35, 39, 42], "state": [1, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dict": [1, 4, 5, 8, 9, 11, 19, 22, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41, 44, 45, 47], "ckpt": [1, 17, 22], "pt": [1, 9, 18], "modelcheckpoint": [1, 20], "hook": [1, 4], "work": [1, 8, 9, 10, 12, 22, 23, 29, 31, 36, 39, 40, 41, 42, 43, 44, 47], "harder": 1, "reusabl": [1, 4], "creat": [1, 2, 3, 4, 6, 7, 11, 12, 17, 18, 27, 29, 31, 34, 35, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47], "serializ": 1, "optim": [1, 18, 19, 29, 33], "execut": [1, 2, 5, 11, 12, 15, 27, 29, 33, 36, 39, 40, 41, 47], "perform": [1, 5, 9, 13, 15, 29, 45], "reli": [1, 3, 4, 5], "gil": 1, "These": [1, 4, 5, 9, 11, 15, 22, 26, 29, 45, 47], "complet": [1, 2, 4, 27, 30, 35, 36, 37, 39, 43, 44, 45], "self": [1, 3, 16, 18, 19, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "automat": [1, 29, 31, 39, 40, 45, 47], "convert": [1, 2, 8, 16, 17, 23, 24, 28, 45], "document": [1, 5, 11, 15, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "export": [1, 12, 18, 20, 22, 27, 29, 36], "quantiz": 1, "version": [1, 8, 12, 15, 18, 20, 29, 30, 39, 40, 46], "both": [1, 3, 4, 5, 8, 29, 30, 45], "full": [1, 3, 4, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 26, 29, 36, 47], "precis": 1, "consum": [1, 4, 12, 17, 22, 29, 30], "9": [1, 12, 27, 29, 30, 40], "0": [1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "separ": [1, 11, 13, 22, 24, 29, 37, 42], "It": [1, 2, 3, 4, 13, 15, 16, 17, 18, 20, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quit": [1, 7], "doesn": [1, 22, 28, 37, 39, 40, 41, 45], "widespread": 1, "adopt": 1, "upload": [1, 14, 18, 21, 22, 23, 24, 29, 36, 47], "api": [1, 2, 3, 4, 9, 13, 22, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "reason": [1, 30], "write": [1, 2, 3, 4, 8, 12, 16, 29, 30, 33, 36, 41, 45], "custom": [1, 3, 4, 6, 10, 22, 25, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "logic": [1, 2, 3, 13, 15, 31, 45], "deploi": [1, 9, 40], "build": [1, 2, 12, 29, 35, 36, 37, 39, 47], "server": [1, 3, 5, 7, 40, 45], "typic": [1, 2, 13, 22, 23, 24, 31, 33, 41, 45], "unit": 1, "other": [1, 2, 3, 4, 8, 12, 16, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "unittest": 1, "main": [1, 2, 4, 5, 11, 12, 13, 14, 15, 17, 20, 29, 33, 40, 41], "customapptest": 1, "testcas": 1, "test_main": 1, "none": [1, 3, 4, 5, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "src": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "dst": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "asserttru": 1, "high": [2, 30, 31], "behind": 2, "check": [2, 3, 12, 18, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quickstart": [2, 8, 12, 25, 39], "guid": [2, 8, 12, 25, 29, 40], "workspac": [2, 12, 13, 25, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "patch": [2, 13, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "tool": [2, 3, 9, 44, 45], "submit": [2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "stage": [2, 8, 11, 16, 19, 35, 38, 42], "ml": [2, 13, 25, 29, 36, 46], "platform": [2, 8, 24, 29], "abstract": [2, 22, 34, 41, 45, 46, 47], "uml": 2, "diagram": [2, 8, 30], "simpli": [2, 3, 8, 11, 14, 15, 31, 34, 41], "struct": 2, "actual": [2, 9, 13, 15, 22, 30, 31, 33, 34, 41, 45], "lingo": 2, "jobdefinit": 2, "yaml": [2, 21, 22, 23, 24, 28, 29, 39, 40], "disambigu": 2, "between": [2, 11, 16, 20, 29, 33, 35, 36, 45], "binari": [2, 3, 5, 11, 12, 29, 33, 41], "refer": [2, 3, 8, 15, 34, 43, 44, 45, 47], "understood": [2, 3], "simpl": [2, 3, 4, 8, 10, 12, 13, 14, 15, 17, 18, 29, 33, 34, 45], "echo": [2, 3, 8, 11, 12, 23, 24, 27, 35, 36, 38, 39, 40, 42, 44], "hello": [2, 3, 8, 10, 11, 13, 15, 23, 24, 25, 27, 31, 35, 36, 38, 39, 40, 44], "world": [2, 8, 11, 20, 31], "name": [2, 3, 5, 8, 9, 11, 12, 18, 20, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "bin": [2, 3, 23, 24], "tmp": [2, 3, 11, 12, 14, 17, 20, 22, 27, 28, 29, 33, 41], "arg": [2, 3, 4, 8, 11, 12, 14, 16, 17, 20, 22, 23, 24, 28, 29, 30, 33, 34, 36, 41, 44, 45, 47], "num_replica": [2, 3, 4, 8, 11, 23, 28, 29, 41, 42, 45], "As": [2, 7, 8, 10, 13, 24, 33], "dataclass": 2, "encod": [2, 33, 45], "pass": [2, 3, 4, 5, 11, 12, 16, 20, 28, 29, 30, 31, 33, 37, 39, 40, 41, 44, 45, 47], "few": [2, 3, 8, 29, 33, 34], "varieti": [2, 5], "topolog": [2, 5], "mean": [2, 3, 19, 29, 31, 33, 35, 40], "multipl": [2, 3, 4, 5, 8, 23, 24, 30, 31, 34, 40, 41, 45], "repres": [2, 8, 28, 31, 43, 45], "non": [2, 4, 27, 34, 39, 45], "homogen": [2, 5], "coordin": [2, 5, 29, 33, 45], "mani": [2, 10, 30, 34], "worker": [2, 5, 11, 20, 22, 24, 29, 33, 43, 45], "doc": [2, 4, 8, 12, 13, 23, 28, 29, 35, 38, 39, 40, 42, 45, 47], "what": [2, 8, 22, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "field": [2, 3, 4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "good": 2, "scratch": [2, 4], "rather": [2, 3, 8, 13, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "templet": [2, 5, 8], "think": [2, 8], "conveni": [2, 3, 30, 45], "factori": [2, 4, 8, 28, 34, 43, 45, 46], "method": [2, 4, 8, 17, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "unlik": [2, 31, 45], "map": [2, 5, 8, 28, 30, 31, 33, 45, 47], "granular": 2, "vari": [2, 22], "abov": [2, 3, 8, 14, 27, 45], "readi": [2, 11, 25], "hardcod": 2, "data": [2, 5, 16, 17, 20, 22, 29, 33, 34, 36, 42, 45], "parallel": [2, 5, 15, 22, 29, 35, 45], "style": [2, 4, 5, 13, 15, 29, 31, 45], "node": [2, 3, 4, 5, 8, 15, 20, 22, 24, 28, 29, 30, 35, 39, 40, 41, 45], "jobnam": 2, "nnode": [2, 5, 8, 29], "int": [2, 3, 4, 5, 7, 8, 11, 14, 16, 18, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "script_arg": [2, 5, 8], "single_gpu": 2, "resourc": [2, 3, 5, 11, 12, 22, 23, 28, 29, 30, 34, 35, 36, 39, 40, 41, 44], "1024": [2, 5, 11, 22, 29, 45], "parameter": 2, "up": [2, 4, 8, 22, 27, 28, 29, 30, 31, 33, 38, 40, 41, 45], "effort": [2, 33], "than": [2, 4, 5, 8, 13, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "try": [2, 16, 30, 39], "over": [2, 5, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "gener": [2, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 36, 39, 44, 47], "everyth": [2, 3], "easi": [2, 5, 16, 22, 33], "cheap": 2, "base": [2, 3, 4, 8, 11, 12, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "repetit": 2, "protip": 2, "composit": 2, "achiev": 2, "purpos": [2, 8, 12, 17, 22, 29, 33, 41], "dsl": [2, 23, 24, 28], "section": [2, 8, 31, 45, 46], "understand": [2, 4, 6, 24, 26, 33], "context": [2, 8, 12, 29, 35, 47], "befor": [2, 3, 4, 7, 15, 22, 30, 45, 47], "brows": [2, 3, 8, 27, 29], "fit": [2, 3, 20, 29, 36], "doe": [2, 3, 8, 9, 11, 12, 13, 15, 16, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "exactli": 2, "expect": [2, 5, 34, 38, 40, 41, 44, 45, 47], "launch": [2, 3, 5, 7, 8, 12, 13, 14, 15, 17, 22, 23, 24, 26, 27, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onto": [2, 3, 14, 31, 34, 35], "app_spec": 2, "programmat": [2, 4, 12, 27, 29, 41, 43, 44, 47], "get_runn": [2, 8, 27, 30, 31], "appspec": [2, 35, 36, 37, 39, 43, 44], "list": [2, 4, 5, 8, 11, 14, 17, 18, 20, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "plug": 2, "workflow": [2, 3, 8, 11, 16, 27, 33], "specif": [2, 3, 5, 6, 24, 26, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "target": [2, 8, 17], "kubeflow": [2, 25, 26], "whatev": 2, "represent": 2, "kfp": [2, 21, 22, 23, 24], "containerop": [2, 24, 28], "accur": 2, "advanc": [2, 13, 21, 23, 24, 25, 29], "especi": [2, 4], "mini": 2, "control": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "flow": 2, "hpo": [2, 11, 19, 46], "sub": [2, 5, 29, 30, 32, 33], "inlin": [2, 24], "exact": [2, 3, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "semant": [2, 8, 30, 34, 47], "dynam": 2, "upstream": [2, 8], "take": [2, 3, 5, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "advantag": [2, 46], "featur": [2, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "tri": [2, 24], "canon": 2, "portabl": 2, "skip": [2, 17, 31, 47], "zero": [2, 17, 45], "echo_torchx": 2, "becaus": [2, 3, 13, 27, 31, 45], "essenti": [2, 3], "anywher": [2, 33], "agnost": [2, 20, 32], "fashion": [2, 22], "layer": [2, 12, 20, 29], "touch": [2, 11, 12], "infra": [2, 29, 36], "NOT": [2, 3, 16, 30, 31, 33, 41, 45], "boto3": [2, 35, 36], "input_path": [2, 14, 22], "session": [2, 30, 45, 46], "client": [2, 3, 12, 22, 23, 24, 34, 35, 36, 38, 39, 40, 41], "s3_input_path": 2, "split": [2, 13, 15], "bucket": [2, 9, 29, 33, 36], "kei": [2, 29, 31, 33, 36, 45, 46], "join": [2, 14, 16, 17, 18, 20, 22], "download_fil": 2, "torch": [2, 4, 5, 8, 13, 15, 16, 17, 18, 20, 29, 40, 45], "rest": 2, "breviti": [2, 3, 8, 31], "implicit": 2, "assumpt": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "One": [2, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "storag": [2, 3, 7, 16, 22, 29, 36, 39, 40, 46], "introduc": 2, "system": [2, 29, 35, 40], "framework": 2, "alreadi": [2, 3, 12, 19, 29, 30, 31], "io": [2, 5, 7, 9, 11, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "hood": [2, 5, 8, 33], "rewritten": 2, "pytorch_lightn": [2, 16, 18, 19, 20], "input_url": 2, "fs": [2, 14, 16, 18, 46, 47], "get_filesystem": 2, "open": [2, 14, 16, 17, 22, 23, 24, 33], "rb": [2, 14], "f": [2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "now": [2, 3, 12, 31], "compat": [2, 13, 15, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46], "store": [2, 5, 22, 29, 31, 33, 35, 36, 45], "variou": [2, 8, 15, 31, 46], "With": [2, 27, 40], "exist": [2, 4, 7, 12, 14, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "find": [2, 3, 31, 45], "pointer": 2, "ideal": 2, "time": [2, 3, 4, 7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "intend": [2, 24, 30, 33, 43, 45], "But": 2, "proper": 2, "perman": 2, "home": [2, 8, 12, 29, 30, 31], "even": [2, 3, 4, 41], "entir": [2, 45], "oss": [2, 18, 20], "until": [2, 4, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "happen": 2, "matur": 2, "commandlin": [3, 45, 47], "around": [3, 16, 22, 33], "runner": [3, 4, 8, 12, 25, 26, 27, 29, 31, 34, 35, 36, 43], "directli": [3, 4, 7, 8, 17, 22, 27, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "pipelin": [3, 7, 12, 15, 27, 29, 33, 45], "aka": [3, 30], "quickli": [3, 26], "iter": [3, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "incur": 3, "technic": 3, "cognit": 3, "overhead": 3, "deal": [3, 22, 33, 45], "doubt": 3, "help": [3, 4, 8, 12, 14, 15, 17, 20, 22, 29, 34, 38, 40, 45, 47], "consid": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "n": [3, 5, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "config": [3, 13, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "metric": [3, 4, 12, 19, 22, 25, 29, 30, 36, 46], "serv": [3, 12, 22, 25], "torchserv": [3, 9, 12, 18, 22], "get": [3, 8, 12, 16, 20, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "runopt": [3, 29, 30, 31, 34, 38, 40, 45, 47], "local_dock": [3, 12, 29, 31, 37, 45], "log_dir": [3, 12, 27, 29, 31, 41], "dir": [3, 7, 12, 13, 27, 29, 31, 41], "stdout": [3, 5, 11, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stderr": [3, 5, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica": [3, 5, 11, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "local_cwd": [3, 4, 5, 8, 12, 14, 17, 20, 25, 27, 29, 30, 31, 41, 44, 45], "slurm": [3, 4, 25, 34], "subcommand": [3, 8, 31, 46], "either": [3, 4, 8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "sched_nam": 3, "along": [3, 7, 8, 30], "cat": [3, 31], "my_trainer_spec": 3, "my_train": [3, 45], "detail": [3, 4, 20, 42], "chose": [3, 5, 29, 31, 34], "three": 3, "scheduler_arg": [3, 35, 39, 40], "known": [3, 11, 31, 34, 39], "run_opt": [3, 34, 38, 40], "run_config": 3, "each": [3, 4, 5, 11, 19, 20, 21, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "shown": [3, 31, 36], "comma": [3, 29, 31, 37, 45], "delimit": [3, 8, 20, 31, 45], "k": [3, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "v": [3, 15], "pair": [3, 31, 45], "seen": [3, 8], "usag": [3, 8, 12, 27, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "h": [3, 5, 8, 11, 29, 45], "msg": [3, 8, 11, 12, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45], "messag": [3, 8, 11, 27, 29, 34, 45], "show": [3, 8, 22, 27, 28, 29, 31], "exit": [3, 4, 7, 8, 12, 27, 29, 45], "put": [3, 14, 18, 27, 33], "togeth": [3, 23, 24, 44], "2022": 3, "06": [3, 29], "15": [3, 12, 29], "08": [3, 29], "57": [3, 29], "info": [3, 4, 5, 11, 12, 17, 22, 23, 24, 27, 28, 29, 30, 34, 35, 36, 37, 39, 40, 44], "locat": [3, 11, 29, 36, 38, 41, 43, 46], "crls3hcpwjmhc": 3, "By": [3, 41], "block": [3, 4, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "finish": [3, 12, 29, 36, 43], "instead": [3, 4, 5, 12, 27, 29, 30, 33, 34, 39, 43, 45, 47], "print": [3, 10, 12, 13, 14, 16, 17, 18, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "url": [3, 9, 11, 14, 33, 34, 45], "form": [3, 8, 44, 45], "scheduler_nam": [3, 31], "job_id": [3, 46], "keep": [3, 4, 31, 34], "note": [3, 4, 5, 8, 11, 12, 14, 15, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "identifi": [3, 18, 29, 34, 35, 45, 47], "regist": [3, 5, 9, 11, 29, 30, 31, 34, 45], "debug": [3, 5, 29, 36], "request": [3, 12, 29, 30, 34, 36, 39, 40, 41, 44, 45], "hello_world": [3, 12, 42, 45], "metadata": [3, 12, 22, 28, 29, 45, 46], "env": [3, 5, 11, 27, 29, 31, 37, 41, 44, 45], "max_retri": [3, 5, 11, 29, 39, 40, 44, 45], "port_map": [3, 28, 45], "capabl": [3, 5, 39, 40, 45, 47], "retry_polici": [3, 45], "retrypolici": [3, 45], "popenrequest": [3, 41], "app_id": [3, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "echo_c944ffb2": 3, "torchx_asmtmyqj": 3, "torchx_kiuk": 3, "role_param": [3, 41], "replicaparam": [3, 41], "torchelastic_error_fil": 3, "json": [3, 33, 34, 43, 45, 46], "role_log_dir": [3, 41], "look": [3, 29, 30, 31, 33, 45], "faux": 3, "local": [3, 5, 7, 12, 14, 16, 17, 20, 22, 25, 29, 30, 33, 34, 36, 37, 44, 47], "subprocess": [3, 18, 41], "popen": [3, 41], "simul": [3, 46], "posix": 3, "process": [3, 5, 13, 14, 15, 17, 22, 29, 34, 41], "nevertheless": 3, "valuabl": 3, "insight": 3, "translat": 3, "particular": [3, 4, 8, 31], "invers": 3, "That": [3, 45], "app_handl": [3, 30, 34, 45], "recreat": [3, 34, 39], "descript": [3, 8, 12, 14, 17, 20, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "alwai": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "100": [3, 12, 22, 30, 41], "wa": [3, 19, 27, 30, 34, 44, 45], "extent": [3, 30], "numer": [3, 33], "factor": 3, "describe_job": 3, "whether": [3, 5, 8, 29, 35, 36, 37, 39, 43, 45], "ignor": [3, 5, 11, 17, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "never": [3, 16, 19], "spot": [3, 29, 36], "filter": [3, 30], "down": [3, 44], "larg": [3, 29, 33, 46], "long": [3, 30, 45], "retain": [3, 29, 36], "archiv": [3, 9, 18], "behalf": [3, 41], "get_log": 3, "obtain": 3, "manual": [3, 4, 27, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "retent": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "properli": [3, 13], "wrapper": [3, 16], "let": [3, 8, 12, 14, 22, 29, 30], "pull": [3, 12, 41, 45], "place": [3, 4, 16, 17, 20, 22, 29, 40, 42, 44], "pattern": [3, 29, 30, 37, 45], "explanatori": 3, "id": [3, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "tail": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "still": [3, 8, 33, 46], "regex": [3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "except": [3, 16, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "role_nam": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica_id": [3, 39, 40, 44, 45], "rank": [3, 20, 30, 33], "side": [3, 7], "appli": [3, 28, 29, 31, 37, 39, 40, 45, 47], "veri": [3, 6, 18, 29], "tax": 3, "host": [3, 5, 8, 11, 29, 30, 35, 36, 37, 39, 40, 41, 42, 45], "pleas": [3, 27, 29, 34, 42, 45], "judgment": 3, "status": [3, 45], "further": [3, 29, 34], "a5qvfhe1hyq2w": 3, "succeed": [3, 12, 29, 45], "d796ei2tdtest": 3, "em0iao2m90000": 3, "fail": [3, 12, 30, 37, 39, 45], "ew33oxmdg0123": 3, "design": [4, 25, 26, 27, 45], "deviat": 4, "necessari": [4, 15, 30, 34, 41, 45], "m": [4, 5, 8, 11, 12, 18, 22, 29], "docker": [4, 5, 8, 12, 22, 25, 34, 35, 36, 45, 47], "resolut": [4, 30], "isn": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "folder": [4, 14, 16, 18, 47], "regardless": 4, "img_nam": 4, "img_vers": 4, "reus": [4, 12, 16], "hard": [4, 32], "sort": 4, "manipul": 4, "imposs": 4, "convent": [4, 33], "avoid": [4, 29, 30], "where": [4, 5, 8, 11, 22, 26, 29, 31, 33, 36, 43, 44, 45, 46], "feel": 4, "statement": 4, "prefer": [4, 34, 39, 40, 41, 45], "trainer_test": 4, "_trainer": 4, "trainer_prod": 4, "10": [4, 12, 20, 29, 30, 40, 45], "ref": 4, "overview": [4, 25], "memori": [4, 5, 11, 29, 39, 40, 42, 44, 47], "alloc": [4, 22, 30, 34, 39, 40, 41, 44, 45], "independ": [4, 40], "schedul": [4, 5, 8, 11, 12, 13, 14, 15, 17, 20, 22, 23, 26, 27, 28, 30, 31, 32, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "behavior": [4, 7, 26, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "altern": [4, 30, 46], "merg": 4, "could": [4, 45], "ui": [4, 22, 23, 24, 28, 45, 46], "sidecar": 4, "servic": [4, 7, 22, 29, 34, 39, 40, 46], "re": [4, 22, 25, 27, 34, 39, 40, 45], "comput": [4, 18, 20, 35], "extend": [4, 46], "dictionari": [4, 29, 34, 36], "figur": [4, 18], "static": [4, 22, 42, 45], "pyre": [4, 16, 17, 18], "mypi": 4, "normal": [4, 12, 14, 15, 16, 22, 27, 29], "valid": [4, 11, 13, 15, 22, 30, 33, 34, 41, 45], "componenttestcas": 4, "ensur": [4, 13, 17, 20, 34], "pars": [4, 30, 33, 45], "stricter": 4, "component_test_bas": 4, "methodnam": 4, "runtest": 4, "sourc": [4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "run_compon": [4, 27, 30], "callabl": [4, 16, 41, 45, 46], "scheduler_param": [4, 30], "interv": [4, 30], "float": [4, 7, 8, 11, 18, 19, 20, 30, 31, 33, 45, 47], "timeout": [4, 7, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "appstatu": [4, 30, 45], "helper": [4, 47], "hide": 4, "poll": [4, 7, 30], "reach": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "compplet": 4, "max": [4, 5, 45], "fixtur": 4, "exercis": 4, "teardown": [4, 16], "deconstruct": 4, "after": [4, 8, 17, 22, 29, 31, 36, 45], "function_nam": [4, 30], "fn": [4, 45], "bash": [4, 11, 44], "script": [4, 5, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 29, 42, 44], "core": [4, 24, 45], "gang": [5, 37, 39, 40], "copi": [5, 11, 12, 22, 29, 37, 43, 45], "leverag": [5, 22, 24, 29], "express": [5, 29, 36], "overal": 5, "wise": 5, "wherea": 5, "num": [5, 29, 45], "assum": [5, 8, 17, 22, 29, 33, 35, 39, 40, 41], "x": [5, 18, 29, 42], "j": [5, 15, 20, 22, 29, 31, 42], "1x4": 5, "total": [5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 30, 41], "2x4": 5, "rdzv_port": [5, 29], "master": [5, 22, 40], "port": [5, 7, 29, 45], "29500": [5, 29], "cfg": [5, 12, 13, 14, 20, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "queue": [5, 14, 23, 28, 29, 31, 35, 39, 42], "autosc": 5, "minimum": [5, 30, 39, 40, 45], "5": [5, 11, 12, 14, 16, 17, 29, 45], "5x8": 5, "compar": 5, "torchelast": [5, 29, 45], "read": [5, 16, 22, 23, 24, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "ghcr": [5, 7, 9, 11, 12, 29, 31], "0dev0": [5, 7, 9, 11, 12, 29], "1x2": [5, 15, 20, 29, 31], "rdzv_backend": [5, 8, 29], "c10d": [5, 8, 29], "mount": [5, 11, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "bool": [5, 8, 9, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "fals": [5, 8, 9, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "tee": [5, 29], "multi": [5, 8, 24, 29, 35, 39, 40, 43], "rendezv": [5, 29], "rendezvous_endpoint": [5, 29], "rank_0_host": [5, 29], "instruct": [5, 12, 15, 17, 29, 39, 40], "free": [5, 29, 30, 34, 41, 45], "random": [5, 16, 17, 20, 29], "mutual": [5, 11, 29, 45], "exclus": [5, 11, 29, 45], "preced": [5, 11, 29, 31, 41], "overrid": [5, 29, 30, 31, 34, 41, 43], "experimentnam": [5, 29], "runnam": [5, 29], "per": [5, 8, 11, 20, 22, 29, 33, 39, 41, 43, 44], "mb": [5, 11, 29, 45], "min_nnod": [5, 29], "nproc_per_nod": [5, 8, 29], "exce": [5, 29], "varibl": [5, 11, 29], "env1": [5, 11, 29, 37], "v1": [5, 8, 11, 12, 28, 29, 37, 39, 40, 45], "env2": [5, 11, 29, 37], "v2": [5, 8, 11, 29, 37, 45], "env3": [5, 11, 29, 37], "v3": [5, 8, 11, 29, 37, 45], "retri": [5, 11, 29, 39, 40, 41, 45], "rank0": [5, 29], "chosen": [5, 29], "ex": [5, 11, 29, 35, 36, 37, 39, 40, 45], "bind": [5, 11, 29, 35, 37, 39, 40, 42, 45], "volum": [5, 11, 29, 35, 36, 37, 39, 40, 45], "readonli": [5, 11, 29, 35, 37, 39, 40, 45], "preset": [5, 29], "flag": [5, 8, 29], "enabl": [5, 12, 29, 36, 38, 40, 44, 46], "std": [5, 29], "stream": [5, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "consol": [5, 29], "_torch_debug_flag": 5, "commonli": 5, "variabl": [5, 11, 29, 31, 34, 36, 37, 41, 43, 45], "cuda_launch_block": 5, "nccl_desync_debug": 5, "torch_distributed_debug": 5, "torch_show_cpp_stacktrac": 5, "model": [6, 7, 9, 10, 15, 16, 20, 22, 27, 28, 29, 33, 36, 41, 46], "often": [6, 10, 33, 46], "thu": [6, 39, 40, 45], "analyz": [6, 17], "render": [6, 7], "cloud": [7, 16, 22, 38, 39, 40, 42], "Or": [7, 31], "part": [7, 12, 15, 19, 21, 24, 26, 28, 30, 33, 45], "tensorboardlogg": [7, 20], "tutori": [7, 11, 17], "http": [7, 9, 12, 13, 14, 15, 17, 22, 23, 28, 29, 30, 35, 38, 39, 40, 42, 44, 45, 47], "intermedi": [7, 12, 29], "tensorboard_tutori": 7, "html": [7, 9, 29, 35, 44, 45], "logger": [7, 19, 20], "readthedoc": 7, "en": [7, 23, 28, 42], "stabl": [7, 29], "extens": 7, "logdir": 7, "3600": 7, "6006": 7, "start_on_fil": 7, "exit_on_fil": 7, "termin": [7, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "condit": 7, "caus": [7, 17], "trigger": 7, "correspond": [7, 28, 31, 45], "second": [7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 36, 44], "shutdown": 7, "illustr": 8, "Not": [8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "current": [8, 11, 12, 22, 28, 29, 31, 36, 39, 41, 43, 44, 45, 46, 47], "collect": [8, 13, 15, 27, 29], "categori": 8, "our": [8, 12, 16, 17, 20, 23, 24, 29], "page": 8, "ve": [8, 22], "being": [8, 29, 47], "downstream": [8, 30], "o": 8, "sure": [8, 22, 30, 31, 45], "rule": [8, 45, 47], "thumb": 8, "familiar": 8, "yourself": 8, "pep": 8, "484": 8, "annot": [8, 34, 45], "primit": [8, 45], "primitive_kei": 8, "primitive_valu": 8, "var_arg": 8, "docstr": [8, 45], "googl": [8, 12, 29, 38, 45], "function_with_pep484_type_annot": 8, "autogener": 8, "pick": [8, 31], "simplifi": 8, "os": [8, 14, 16, 17, 18, 20, 22, 41, 46], "aws_p3": [8, 45], "2xlarg": [8, 45], "basenam": [8, 14], "rdzv_endpoint": 8, "localhost": [8, 12, 15, 25, 41], "5900": 8, "nprocs_per_nod": 8, "save": [8, 14, 16, 17, 18, 20, 27, 29, 33, 36], "torchx_param": 8, "tip": [8, 31, 45], "improv": [8, 45], "posit": [8, 29], "dep": [8, 27], "machin": [8, 10, 39, 40, 45], "bodi": [8, 31], "Then": [8, 31], "reflect": [8, 47], "correctli": [8, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "easiest": 8, "dryrun": [8, 9, 30, 47], "linter": 8, "dist_test": 8, "ident": [8, 29, 41, 46], "fact": 8, "walk": [8, 14, 16, 47], "though": 8, "basic": [8, 12, 25, 34, 38, 40, 47], "invok": [8, 41, 45], "regular": [8, 14, 15, 29, 36], "component_modul": 8, "component_fn": 8, "rel": [8, 12, 13, 29, 30, 36, 41], "d": [8, 12, 29, 31], "drop": [8, 31], "slightli": [8, 29], "syntax": [8, 24], "component_path": [8, 30], "bob": [8, 31], "absolut": [8, 29, 30, 36, 41], "shell": [8, 44], "expans": 8, "cwd": [8, 29, 31, 41], "cd": [8, 15, 31], "know": [8, 22, 29, 45], "straight": 8, "forward": [8, 18], "program": [8, 11, 12, 14, 17, 29], "doubl": [8, 13], "dash": 8, "param_nam": 8, "param1": 8, "argpars": [8, 12, 14, 17, 20, 22], "parser": [8, 12, 14, 17, 20, 22], "summari": [8, 19], "imagin": 8, "comp": 8, "i": [8, 16, 17, 27, 29], "b": [8, 31], "l": 8, "vararg": [8, 31], "true": [8, 12, 14, 17, 18, 20, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "k1": 8, "k2": 8, "k3": 8, "c": [8, 10, 11, 29, 31, 43], "henc": [8, 14, 31, 33, 45, 46], "end": [8, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "arg1": 8, "arg2": 8, "arg3": 8, "adapt": [8, 22, 23, 24, 26, 28, 35, 45, 46], "orchestr": [8, 27], "expositori": [8, 13], "quick": [8, 16], "practic": [8, 10], "aim": 9, "infer": [9, 18, 20, 22, 29, 36], "model_path": [9, 22], "management_api": [9, 22], "param": [9, 22, 27, 45], "endpoint": [9, 22, 29, 36], "8081": [9, 22, 45], "root": [9, 14, 15, 16, 31, 41, 43, 45], "loop": 10, "construct": [10, 30, 45, 46], "emb": 10, "limit": [10, 11, 14, 16, 22, 29, 33, 41, 46], "smaller": 10, "sy": [10, 11, 12, 14, 17, 20, 22, 29], "argv": [10, 11, 12, 14, 17, 20, 22, 29], "cp": [11, 42], "meant": 11, "materi": [11, 44], "glue": 11, "oper": [11, 13, 15, 22, 23, 28, 29, 33, 45, 46], "meaning": 11, "sh": [11, 12, 23, 27, 28, 29, 39], "substitut": [11, 45], "destin": 11, "torchx_utils_python": [11, 29], "length": [11, 29], "booth": [11, 12], "x1": 11, "x2": 11, "trial_idx": 11, "tracker_bas": [11, 33], "evalu": [11, 29, 30, 36], "7": [11, 12, 29], "fsspecresulttrack": [11, 33], "outdir": 11, "uri": [11, 29, 33, 36], "tracker": [11, 12, 25, 27, 29, 33], "torchx_utils_binari": 11, "off": [12, 29], "anyth": [12, 29, 41], "writefil": [12, 29], "my_app": [12, 25, 29], "__name__": [12, 13, 14, 17, 20, 33], "__main__": [12, 13, 14, 17, 20, 33], "argumentpars": [12, 14, 17, 20, 22], "add_argu": [12, 14, 17, 20, 22], "person": [12, 31], "greet": 12, "parse_arg": [12, 14, 17, 20, 22], "friendli": 12, "my_compon": [12, 30, 31], "latest": [12, 28, 29, 35, 36, 39, 40, 41, 42, 45], "greeter": 12, "2024": [12, 27, 29], "07": [12, 27, 29], "22": [12, 29, 39, 40], "21": [12, 29, 39], "00": [12, 27, 29], "43": [12, 29], "temporari": [12, 27, 29], "delet": [12, 27, 29], "preserv": [12, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "torchx_8tuc8ai0": 12, "wait": [12, 27, 29, 30, 34, 36, 43], "44": 12, "sw1wqcq96tsdg": 12, "won": [12, 29, 44], "colab": [12, 29], "com": [12, 15, 22, 29, 35, 36, 37, 38, 39, 40, 42, 44, 45, 47], "dockerfil": [12, 29, 47], "0rc1": 12, "34": [12, 29, 40], "driver": [12, 45], "intern": 12, "99b": 12, "0s": 12, "7s": 12, "dockerignor": [12, 47], "2b": 12, "425b": 12, "sha256": [12, 29, 47], "a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3": 12, "resolv": [12, 29, 30, 41, 45], "25kb": 12, "d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726": 12, "0b": 12, "857b": 12, "1s": 12, "889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f": 12, "189b": 12, "3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c": 12, "21kb": 12, "4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca": 12, "26": [12, 29], "70mb": 12, "2s": 12, "6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203": 12, "94mb": 12, "10mb": 12, "3s": 12, "143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907": 12, "00gb": 12, "73mb": 12, "5s": 12, "39mb": 12, "23": 12, "07mb": 12, "6s": 12, "eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa": 12, "132b": 12, "extract": [12, 16, 29, 36], "d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71": 12, "46mb": 12, "06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77d": 12, "257b": 12, "8s": 12, "f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6": 12, "71gb": 12, "101": 12, "71mb": 12, "88": 12, "15mb": 12, "19mb": 12, "208": 12, "67mb": 12, "185": 12, "60mb": 12, "6": [12, 29, 39], "29mb": 12, "314": 12, "57mb": 12, "275": 12, "78mb": 12, "49mb": 12, "421": 12, "53mb": 12, "367": 12, "00mb": 12, "12": [12, 29, 41], "58mb": 12, "4s": 12, "457": 12, "18mb": 12, "14": [12, 18, 29], "68mb": 12, "528": 12, "48mb": 12, "547": 12, "36mb": 12, "18": [12, 29, 39, 40], "87mb": 12, "634": 12, "20": [12, 29], "97mb": 12, "c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932": 12, "92b": 12, "636": 12, "30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2": 12, "352b": 12, "9s": 12, "909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233": 12, "341": 12, "741": 12, "34mb": 12, "29": 12, "727": 12, "50": [12, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "33mb": 12, "71": 12, "30mb": 12, "92": 12, "27mb": 12, "113": 12, "25mb": 12, "13": [12, 16, 27, 29, 40], "847": 12, "817": 12, "89mb": 12, "134": 12, "156": 12, "24mb": 12, "177": 12, "21mb": 12, "205": 12, "52mb": 12, "907": 12, "02mb": 12, "226": 12, "954": 12, "20mb": 12, "247": 12, "297": 12, "80mb": 12, "317": 12, "72mb": 12, "998": 12, "05gb": 12, "339": 12, "74mb": 12, "08gb": 12, "17": [12, 29], "f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800": 12, "563": 12, "38kb": 12, "16gb": 12, "88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968": 12, "556": 12, "96kb": 12, "18gb": 12, "26gb": 12, "19": 12, "27gb": 12, "37gb": 12, "36gb": 12, "45gb": 12, "47gb": 12, "55gb": 12, "24": [12, 17, 29, 36, 40, 45], "58gb": 12, "64gb": 12, "25": [12, 29], "68gb": 12, "78gb": 12, "27": [12, 29], "28": 12, "89gb": 12, "30": [12, 29, 36, 43], "6894810b99959b068fed4046cd9c709a785a92d740fbbc85f8b5e485bffc9500": 12, "02": 12, "disabl": [12, 29, 31, 36, 44], "35": 12, "36": 12, "warn": [12, 27, 29, 34], "fall": 12, "404": 12, "45": 12, "tag": [12, 27, 29, 35, 36, 47], "amp": 12, "fromimag": 12, "deni": 12, "repositori": [12, 29, 35, 36, 37, 39, 40, 47], "39": [12, 27, 29], "login": [12, 47], "step": [12, 22, 24, 25, 45, 46], "gt": [12, 27, 29], "6894810b9995": 12, "40": [12, 29], "056e890be821": 12, "label": [12, 17, 29, 36], "141e2a260a4b": 12, "remov": [12, 27, 29, 30, 39], "523f4054b4be": 12, "successfulli": [12, 29, 30, 45], "523f4054b4be56bc1c1baea10daa9d9516480b017a3160a430061489a81f47a5": 12, "origin": [12, 29, 35, 36, 37, 39, 43, 44, 45], "46": 12, "fzgbvxbtf7xqsd": 12, "push": [12, 29, 35, 36, 37, 39, 40, 47], "premad": 12, "discov": 12, "spmd": 12, "48": 12, "05": [12, 27, 29], "dced593a08fb": [12, 29], "41": 12, "2e0bbafaa42b": 12, "e9ca0762c4a5": 12, "05956815667f": 12, "05956815667f37fa6a1a6c84b0766e5df954714086c917274691e8e9d04a2820": 12, "49": 12, "wffl2x7dd7vsdc": 12, "click": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "download": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 36, 41, 42], "minim": [13, 15], "initi": [13, 15, 16, 20, 29, 36, 43, 45], "all_reduc": [13, 15, 29], "enough": [13, 15], "compute_world_s": [13, 15], "submodul": 13, "e2": [13, 25], "diff": [13, 47], "hydra": 13, "stack": 13, "been": [13, 17, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "seriou": 13, "omegaconf": 13, "dictconfig": 13, "multiprocess": 13, "record": [13, 19, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "to_yaml": 13, "throw": [13, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "rais": [13, 19, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "runtimeerror": 13, "compos": [13, 14, 16], "ipython": 13, "pwd": 13, "ab": 13, "cc": 13, "jupyter_notebook": 13, "initialize_config_modul": 13, "config_modul": 13, "config_nam": 13, "minut": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 44], "000": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "ipynb": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "galleri": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "sphinx": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "torchvis": [14, 15, 16, 18, 29], "reupload": [14, 15], "datapreproc": [14, 17, 22], "cs231n": [14, 22], "stanford": [14, 22], "edu": [14, 22], "tini": [14, 15, 16, 22], "imagenet": [14, 15, 16, 22], "200": [14, 18, 22], "zip": [14, 15, 21, 22], "output_path": [14, 16, 17, 20, 22, 29, 36], "tarfil": [14, 16], "tempfil": [14, 17, 20], "zipfil": 14, "pil": [14, 16], "transform": [14, 16, 22, 26, 28], "dataset": [14, 15, 17, 18], "is_image_fil": [14, 16], "tqdm": [14, 16, 29], "tar": [14, 16, 45], "gz": [14, 16], "download_and_extract_zip_arch": 14, "r": [14, 15, 16, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "zip_ref": 14, "extractal": [14, 16], "temporarydirectori": [14, 17, 20], "tmpdir": [14, 16, 17, 18, 20], "img_root": [14, 16, 41, 45], "splitext": 14, "totensor": [14, 16], "topilimag": [14, 16], "image_fil": [14, 16], "fname": [14, 16], "append": [14, 16, 17, 20, 22, 45], "len": [14, 16, 18], "break": [14, 30], "minit": [14, 16], "2000": [14, 16], "tar_path": [14, 16], "pack": [14, 16], "mode": [14, 16, 29, 30, 36, 39, 40], "w": [14, 33], "arcnam": 14, "rpath": [14, 16, 18], "get_fs_token_path": [14, 16, 18], "assert": [14, 16, 17, 18, 27], "rm": 14, "global": [14, 17, 20, 22], "sphinx_gallery_thumbnail_path": [14, 16, 17, 18, 19, 20, 22, 23, 24], "_static": [14, 16, 17, 18, 19, 20, 22, 23, 24], "img": [14, 16, 17, 18, 19, 20, 22, 23, 24, 41, 47], "png": [14, 16, 17, 18, 19, 20, 22, 23, 24], "demonstr": [15, 33], "themselv": 15, "notic": [15, 35, 38, 42, 47], "pip": [15, 25, 29, 35, 38, 39], "git": [15, 29, 36], "clone": [15, 29, 36], "github": [15, 22, 39, 40, 44, 46], "torchx_vers": 15, "sed": 15, "checkout": [15, 27, 29, 47], "dev": [15, 25, 29, 35, 39, 40, 42, 45], "txt": [15, 29, 43, 45, 46], "repo": [15, 29, 36, 47], "interpret": [15, 18, 22, 25, 41, 45], "sever": [15, 45], "ism": 15, "respect": [15, 34, 47], "profil": [15, 20, 29, 36], "examples_apps_python": 15, "examples_apps_jupyt": 15, "numpi": [16, 17, 29], "pl": [16, 18, 20], "dataload": [16, 17], "imagefoldersamplesdataset": 16, "imagefold": 16, "sampl": [16, 46], "num_sampl": [16, 20], "super": [16, 18, 19], "__len__": 16, "fixm": [16, 17, 18, 31], "attribut": [16, 17, 43], "test_d": 16, "train_d": 16, "val_d": 16, "tinyimagenetdatamodul": [16, 17, 20], "lightningdatamodul": 16, "data_dir": [16, 17, 20], "batch_siz": [16, 17, 20], "loader": 16, "img_transform": 16, "val": [16, 18, 20], "train_dataload": 16, "val_dataload": 16, "test_dataload": [16, 17], "download_data": [16, 17, 20], "remote_path": [16, 18], "unextract": 16, "isdir": 16, "data_path": [16, 17, 20, 22], "create_random_data": [16, 17, 20], "num_imag": 16, "250": 16, "fill": [16, 31, 34], "randomli": 16, "64x64": 16, "preprocess": [16, 17, 22], "train_path": 16, "class1_train_path": 16, "class1": 16, "class2_train_path": 16, "class2": 16, "val_path": 16, "class1_val_path": 16, "class2_val_path": 16, "test_path": 16, "class1_test_path": 16, "class2_test_path": 16, "makedir": [16, 17, 20], "fileexistserror": 16, "rang": [16, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pixel": 16, "rand": 16, "255": 16, "im": 16, "fromarrai": 16, "astyp": 16, "uint8": 16, "rgb": 16, "rand_image_": 16, "jpeg": 16, "process_imag": 16, "lib": [16, 18, 19, 29, 45], "seri": [17, 29, 36], "gradient": [17, 22], "overlai": [17, 29, 47], "ai": 17, "cifar_torchvision_interpret": 17, "load_path": [17, 20, 22], "last": [17, 22, 45], "viewer": [17, 28], "visual": 17, "equal": [17, 45], "benefit": 17, "swap": 17, "itertool": 17, "tinyimagenetmodel": [17, 18, 20], "otherwis": [17, 20, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "crash": [17, 45], "np": 17, "attr": 17, "integratedgradi": 17, "viz": 17, "checkpoint": [17, 20, 22, 29, 36], "weight": [17, 33], "analysi": 17, "convert_to_rgb": 17, "arr": 17, "tensor": [17, 18, 29], "ndarrai": 17, "arrai": 17, "squeez": 17, "swapax": 17, "shape": 17, "invalid": [17, 30, 45], "produc": [17, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "init": [17, 20], "load_from_checkpoint": [17, 20], "checkpoint_path": [17, 20], "els": [17, 20, 22], "ig": 17, "first": [17, 22, 23, 24, 29, 30, 34, 45, 46, 47], "islic": 17, "unsqueez": 17, "dim": 17, "zero_grad": 17, "attr_ig": 17, "delta": [17, 45], "baselin": 17, "return_convergence_delta": 17, "count_nonzero": 17, "toi": [17, 18], "sometim": 17, "due": [17, 39, 41], "fig": 17, "axi": 17, "visualize_image_attr": 17, "blended_heat_map": 17, "sign": [17, 29, 37], "show_colorbar": 17, "titl": 17, "out_path": [17, 18], "ig_": 17, "heatmap": 17, "wb": 17, "savefig": 17, "regress": 18, "tupl": [18, 28, 35, 36, 39, 40, 44, 45, 47], "jit": 18, "nn": 18, "torchmetr": 18, "accuraci": [18, 30, 33], "resnet": [18, 29], "basicblock": [18, 29], "lightningmodul": 18, "linear": [18, 29], "net": 18, "layer_s": 18, "lr": [18, 20], "001": 18, "small": [18, 29, 39, 40], "tweak": 18, "match": [18, 31, 44], "tinyimagenet": 18, "avgpool": 18, "adaptiveavgpool2d": 18, "fc": [18, 29], "out_featur": [18, 29], "train_acc": [18, 20], "val_acc": [18, 20], "training_step": 18, "batch": [18, 20, 25, 34], "batch_idx": 18, "_step": 18, "validation_step": 18, "val_batch": 18, "step_nam": 18, "acc_metr": 18, "y": 18, "y_pred": 18, "loss": 18, "cross_entropi": 18, "_loss": 18, "_acc": 18, "todo": 18, "aivan": 18, "fb": 18, "cannot": [18, 27, 41, 42, 45], "configure_optim": 18, "adamw": 18, "export_inference_model": [18, 20], "torchscript": 18, "serial": [18, 33, 43], "dure": [18, 29, 36, 41, 45], "jite": 18, "jit_path": 18, "model_jit": 18, "model_nam": [18, 22], "tiny_image_net": [18, 22], "mar_path": 18, "mar": [18, 22], "handler": 18, "durat": [19, 29, 36], "ax": 19, "simpleloggingprofil": [19, 20], "action": [19, 20, 30], "report": [19, 30], "duration_": 19, "event": [19, 29, 44], "current_act": 19, "action_nam": 19, "valueerror": [19, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "monoton": 19, "stop": [19, 30, 45], "end_tim": 19, "start_tim": 19, "pop": 19, "log_metr": 19, "runtim": [20, 28, 29, 31, 32, 33, 37, 39, 40, 41, 45, 47], "epoch": [20, 22], "log_path": [20, 22], "skip_export": 20, "1x1": [20, 22], "addit": [20, 29, 31, 36, 40, 41, 45], "callback": 20, "store_tru": 20, "narg": 20, "mlp": 20, "hidden": 20, "neural": 20, "get_model_checkpoint": 20, "behav": [20, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "deadlock": 20, "train_loss": 20, "dirpath": [20, 43], "save_last": 20, "checkpoint_callback": 20, "save_dir": 20, "lightning_log": [20, 22], "num_nod": 20, "group_world_s": 20, "acceler": 20, "cuda": 20, "is_avail": 20, "devic": [20, 29, 35, 37, 39, 40, 41, 45], "local_world_s": 20, "strategi": 20, "max_epoch": 20, "acc": 20, "intro": 21, "examples_pipelines_python": 21, "examples_pipelines_jupyt": 21, "someth": [22, 26], "dist_ddp": 22, "utils_copi": 22, "utils_python": 22, "container_from_app": [22, 24, 28], "modifi": [22, 45, 46], "rebuild": [22, 47], "awai": 22, "blob": [22, 33, 40], "readm": [22, 29], "md": [22, 40], "svc": 22, "somewher": 22, "copy_app": 22, "next": 22, "raw": [22, 30, 39, 45], "previou": [22, 27, 45, 46], "ahead": 22, "fulli": [22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "processed_data_path": 22, "datapreproc_app": 22, "fast": [22, 25], "autom": 22, "__file__": 22, "dirnam": 22, "logs_path": 22, "models_path": 22, "trainer_app": 22, "3000": 22, "ui_metadata": [22, 28], "serve_app": 22, "initial_work": 22, "interpret_path": 22, "interpret_app": 22, "track": [22, 25, 29, 39, 44, 46], "set_tti": 22, "respons": [22, 34, 45], "compil": [22, 23, 24, 28], "pipeline_func": [22, 23, 24, 28], "package_path": [22, 23, 24, 28], "rt": [22, 23, 24], "advanced_pipelin": 22, "resource_from_app": [23, 28], "volcano": [23, 28, 29, 31, 39], "echo_app": [23, 24], "alpin": [23, 24, 35, 36, 39, 40, 42], "instanti": [23, 24, 34, 41], "echo_contain": [23, 24], "baseop": 23, "sdk": [23, 24, 28, 29], "chain": [23, 24, 33], "dist_pipelin": 23, "introductori": 24, "cross": 24, "mechan": [24, 37, 45, 46], "wherev": 24, "component_from_app": [24, 28], "convers": 24, "intro_pipelin": 24, "univers": 25, "launcher": 25, "research": 25, "product": 25, "concept": [25, 29, 39, 40], "torchxconfig": [25, 46], "mcad": [25, 29, 34], "rai": [25, 29, 34], "sagemak": [25, 29, 34], "ibm": [25, 34], "spectrum": [25, 34], "lsf": [25, 29, 34], "gcp": [25, 29, 34], "airflow": [25, 26], "deploy": [26, 40, 41], "assembl": 26, "easili": 27, "No": 27, "special": 27, "datetim": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pendulum": 27, "dagrunst": 27, "taskinstancest": 27, "dagruntyp": 27, "dag": 27, "decor": 27, "data_interval_start": 27, "2021": [27, 29], "tz": 27, "utc": 27, "data_interval_end": 27, "timedelta": 27, "dai": [27, 29, 44], "virtualenv": [27, 44], "task_id": 27, "hello_torchx": 27, "run_torchx": 27, "statu": [27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "wait_interv": [27, 30], "raise_for_statu": [27, 45], "didn": 27, "succe": 27, "final": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "log_lin": [27, 30], "make_uniqu": 27, "dag_id": 27, "example_python_oper": 27, "schedule_interv": 27, "start_dat": 27, "catchup": 27, "run_job": 27, "dagrun": 27, "create_dagrun": 27, "execution_d": 27, "data_interv": 27, "run_typ": 27, "ti": 27, "get_task_inst": 27, "get_task": 27, "ignore_ti_st": 27, "success": 27, "ipykernel_4745": 27, "454499020": 27, "removedinairflow3warn": 27, "deprec": [27, 30, 45], "futur": [27, 30, 45, 46], "releas": [27, 39, 40, 46], "22t21": 27, "52": 27, "449": 27, "0000": 27, "taskinst": 27, "2076": 27, "met": 27, "dep_context": 27, "requeueabl": 27, "lt": [27, 29], "lczkcgvjlhm4gc": 27, "manual__2021": 27, "09": 27, "13t00": 27, "455": 27, "2306": 27, "456": 27, "2388": 27, "queued_dur": 27, "465": 27, "2330": 27, "_pythondecoratedoper": 27, "719": 27, "2648": 27, "var": [27, 29, 34, 41], "airflow_ctx_dag_own": 27, "airflow_ctx_dag_id": 27, "airflow_ctx_task_id": 27, "airflow_ctx_execution_d": 27, "airflow_ctx_try_numb": 27, "airflow_ctx_dag_run_id": 27, "722": 27, "430": 27, "endgroup": 27, "53": [27, 29], "410": 27, "72": 27, "414": 27, "local_schedul": [27, 34, 41], "771": 27, "415": 27, "777": 27, "torchx_ljdlx37v": 27, "523": 27, "237": 27, "valu": [27, 29, 31, 33, 36, 39, 40, 41, 45, 46, 47], "524": 27, "441": 27, "post": 27, "529": 27, "1206": 27, "mark": 27, "run_id": [27, 46], "20210913t000000": 27, "20240722t210552": 27, "end_dat": 27, "20240722t210553": 27, "goe": 27, "unspecifi": 28, "app_def": 28, "service_account": [28, 29, 39, 40], "resourceop": 28, "containerfactori": 28, "equival": [28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "load_component_from_": 28, "www": [28, 42], "legaci": 28, "component_spec_from_app": 28, "notabl": 28, "protocol": 28, "log_level": 29, "cancel": [29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "job_nam": [29, 36], "overwrit": [29, 30, 41], "extra": [29, 30, 45, 47], "itself": 29, "torchx_57w4cxsf": 29, "58": 29, "swqb6wpg7lk0d": 29, "59": 29, "b00c44e391a6": 29, "85fe6ef573d4": 29, "c8c3e67341ab": 29, "c8c3e67341aba572d62b3d355b753f405de428ef3ae19c699b34ebad7b0aaefd": 29, "hwtj7tf0nz1rc": 29, "relat": [29, 45], "interest": 29, "dist_app": [29, 42], "init_process_group": 29, "gloo": [29, 42], "am": 29, "get_rank": 29, "get_world_s": 29, "2x2": [29, 42], "73b8d39ff9e9": 29, "fe7309557934": 29, "31": 29, "2c8eaf9aaf2": 29, "2c8eaf9aaf2e9a78ca98ea53c8efba5d579bb90040704afc6e81c4ebe23ceeea": 29, "33": 29, "446": 29, "omp_num_thread": 29, "overload": 29, "tune": [29, 33], "485": 29, "r3g9fcw4bz5h2c": 29, "aws_batch": [29, 35, 36], "basi": [29, 39], "daemon": [29, 47], "image_repo": [29, 35, 36, 37, 39, 40], "partit": [29, 44], "copy_env": [29, 37], "privileg": [29, 35, 37, 39, 40], "quiet": [29, 35, 36, 37, 39], "glob": [29, 37], "foo_": [29, 37], "eiher": [29, 37], "semicolon": [29, 37], "ones": [29, 31, 37, 45], "elev": [29, 35, 37], "permiss": [29, 35, 37, 45], "suppress": [29, 35, 36, 37, 39], "verbos": [29, 35, 36, 37, 39], "prepend_cwd": [29, 41], "auto_set_cuda_visible_devic": [29, 41], "prepend": [29, 41], "cuda_available_devic": [29, 41], "assign": [29, 41, 45], "noth": [29, 30, 31, 41], "count": [29, 39, 40, 41], "comment": [29, 44], "constraint": [29, 44], "mail": [29, 44], "job_dir": [29, 44, 47], "hour": [29, 44], "torchxslurmjobdir": [29, 44], "priority_class": [29, 39], "account": [29, 39, 40, 44], "pod": [29, 39, 40], "priorityclass": [29, 39, 40], "kubernetes_mcad": [29, 40], "prioriti": [29, 35, 40, 45], "priority_class_nam": [29, 40], "image_secret": [29, 40], "coscheduler_nam": [29, 40], "network": [29, 36, 40, 41, 42], "higher": [29, 33, 35, 40], "integ": [29, 40], "admin": [29, 40], "openshift": [29, 40], "secret": [29, 40], "privat": [29, 36, 40, 42], "co": [29, 40], "beyond": [29, 33, 40], "share_id": [29, 35], "job_role_arn": [29, 35], "execution_role_arn": [29, 35], "usernam": [29, 35, 36], "getpass": [29, 35, 36], "getus": [29, 35, 36], "polici": [29, 35, 39, 41, 45], "9999": [29, 35], "amazon": [29, 35, 36, 45], "arn": [29, 35, 36], "iam": [29, 35, 36], "ec": [29, 35], "agent": [29, 35], "xdg": 29, "aws_sagemak": [29, 36], "instance_typ": [29, 36], "instance_count": [29, 36], "keep_alive_period_in_second": [29, 36], "volume_s": [29, 36], "volume_kms_kei": [29, 36], "max_run": [29, 36], "input_mod": [29, 36], "output_kms_kei": [29, 36], "base_job_nam": [29, 36], "subnet": [29, 36], "security_group_id": [29, 36], "model_uri": [29, 36], "model_channel_nam": [29, 36], "metric_definit": [29, 36], "encrypt_inter_container_traff": [29, 36], "use_spot_inst": [29, 36], "max_wait": [29, 36], "checkpoint_s3_uri": [29, 36], "checkpoint_local_path": [29, 36], "debugger_hook_config": [29, 36], "enable_sagemaker_metr": [29, 36], "enable_network_isol": [29, 36], "disable_profil": [29, 36], "max_retry_attempt": [29, 36], "source_dir": [29, 36], "git_config": [29, 36], "hyperparamet": [29, 36], "container_log_level": [29, 36], "code_loc": [29, 36], "training_repository_access_mod": [29, 36], "training_repository_credentials_provider_arn": [29, 36], "disable_output_compress": [29, 36], "enable_infra_check": [29, 36], "artifact": [29, 36, 46, 47], "ec2": [29, 35, 36, 45], "c4": [29, 36], "xlarg": [29, 36], "instance_group": [29, 36], "warm": [29, 36], "pool": [29, 36], "subsequ": [29, 36], "gb": [29, 33, 36], "km": [29, 36], "encrypt": [29, 36], "eb": [29, 36], "attach": [29, 36, 40, 47], "60": [29, 36], "algorithm": [29, 36], "estim": [29, 36], "timestamp": [29, 36], "vpc": [29, 36], "secur": [29, 36], "pre": [29, 31, 36], "channel": [29, 36], "traffic": [29, 36], "persist": [29, 36, 39, 40, 45, 46], "emit": [29, 36], "debugg": [29, 36], "unless": [29, 36, 43], "region": [29, 36], "isol": [29, 36, 44], "move": [29, 36, 45], "asid": [29, 36], "branch": [29, 36, 40], "commit": [29, 36], "2fa_en": [29, 36], "password": [29, 36], "token": [29, 36], "lambda": [29, 36], "credenti": [29, 35, 36, 38], "authent": [29, 35, 36, 38, 47], "compress": [29, 36], "gcp_batch": [29, 38], "central1": [29, 38], "cluster_config_fil": [29, 43], "cluster_nam": [29, 43], "dashboard_address": [29, 43], "127": [29, 43], "8265": [29, 43], "dashboard": [29, 43], "address": [29, 43], "against": [29, 43, 45], "lsf_queue": [29, 42], "jobdir": [29, 42], "container_workdir": [29, 42], "host_network": [29, 42], "shm_size": [29, 42], "64m": [29, 42], "shm": [29, 42], "timm_app": 29, "timm": 29, "resnet18": 29, "cuda11": 29, "cudnn8": 29, "newli": [29, 47], "42": 29, "55": 29, "c3f17e5ac010": 29, "f2ea0a555437": 29, "py3": 29, "whl": 29, "satisfi": 29, "opt": [29, 45], "conda": [29, 44], "python3": 29, "site": 29, "huggingfac": 29, "hub": 29, "huggingface_hub": 29, "268": 29, "kb": 29, "pyyaml": 29, "56": 29, "safetensor": 29, "cp37": 29, "cp37m": 29, "manylinux_2_17_x86_64": 29, "manylinux2014_x86_64": 29, "typing_extens": 29, "2023": 29, "143": 29, "filelock": 29, "61": 29, "importlib": 29, "importlib_metadata": 29, "zipp": 29, "urllib3": 29, "certifi": 29, "2017": 29, "idna": 29, "chardet": 29, "pillow": 29, "0fdc9a81a479": 29, "03": 29, "25513fc7a6fb": 29, "c6646d9ef0c4": 29, "c67744da5bd8": 29, "c67744da5bd8865705a30e6d7180d7902ec16780f29a6591446985e1f56ad660": 29, "conv1": 29, "conv2d": 29, "kernel_s": 29, "stride": 29, "pad": 29, "bia": 29, "bn1": 29, "batchnorm2d": 29, "ep": 29, "1e": 29, "momentum": 29, "affin": 29, "track_running_stat": 29, "act1": 29, "relu": 29, "inplac": 29, "maxpool": 29, "maxpool2d": 29, "dilat": 29, "ceil_mod": 29, "layer1": 29, "sequenti": [29, 33], "drop_block": 29, "aa": 29, "conv2": 29, "bn2": 29, "act2": 29, "layer2": 29, "128": 29, "downsampl": 29, "layer3": 29, "256": 29, "layer4": 29, "512": 29, "global_pool": 29, "selectadaptivepool2d": 29, "pool_typ": 29, "avg": 29, "flatten": 29, "start_dim": 29, "end_dim": 29, "in_featur": 29, "1000": 29, "jrz1vzq147jk3c": 29, "runcfg": [30, 31, 41], "component_default": 30, "close": [30, 34, 41], "human": 30, "readabl": 30, "constructor": [30, 34], "scheduler_factori": 30, "schedulerfactori": [30, 34], "individu": [30, 42], "act": 30, "upon": [30, 45], "cach": 30, "direct": 30, "soon": 30, "interrupt": 30, "clean": 30, "deem": [30, 34, 41], "associ": [30, 45], "undefin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "ok": 30, "reconstruct": 30, "much": 30, "anymor": 30, "union": [30, 31, 35, 42, 45, 47], "parent_run_id": 30, "appdryruninfo": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dry": [30, 34], "pretti": 30, "dryrun_info": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dryrun_compon": 30, "component_arg": 30, "Will": 30, "listapprespons": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prototyp": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "phase": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "subject": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "should_tail": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "honor": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "guarante": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "highli": 30, "log_it": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "discourag": 30, "partial": [30, 35, 36, 37, 39, 41, 43, 44], "purg": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "whitespac": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "charact": 30, "newlin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "configvalu": [30, 45, 46], "present": [30, 31, 43, 45, 47], "anti": 30, "experi": [30, 46], "matches_regex": 30, "model_accuraci": 30, "parse_accuraci": 30, "experiment_nam": 30, "th": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "fetch": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "left": 30, "empti": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cursor": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "begin": 30, "unknownappexcept": 30, "order": [30, 31, 40, 45], "low": [30, 31], "file_path": 30, "componentvalidationexcept": 30, "componentnotfoundexcept": 30, "sparingli": 30, "abus": 30, "lead": 30, "go": 30, "complianc": 30, "term": 30, "unblock": 30, "certain": [30, 31, 41, 47], "short": 30, "scheduler_backend": [30, 34], "scheduler_run_opt": 30, "local_runopt": 30, "past": 30, "replac": [30, 31, 45, 47], "indefinit": 30, "app_statu": 30, "is_termin": 30, "sleep": [30, 33], "beta": [31, 47], "ini": 31, "sensibl": 31, "placehold": 31, "happi": 31, "redundantli": 31, "decid": 31, "date": 31, "leav": 31, "stale": 31, "ls": 31, "enviorn": 31, "torchx_config": 31, "hierarchi": 31, "overlaid": [31, 47], "malform": 31, "unrecogn": 31, "2x8": 31, "overwritten": [31, 33], "cmd": [31, 42, 44, 45], "addition": [31, 46], "some_workspac": 31, "outmost": 31, "hold": [31, 41, 44, 45], "dir_1": 31, "dir_2": 31, "textio": 31, "configfil": 31, "dump": [31, 33, 43], "required_onli": 31, "templat": [31, 45], "find_config": 31, "filepath": 31, "element": [31, 45], "get_config": 31, "barr": 31, "bazz": 31, "fooo": 31, "load_sect": 31, "content": [31, 44, 47], "categor": 32, "topic": [32, 42], "experiment": [33, 45, 46], "AT": [33, 46], "risk": [33, 46], "TO": [33, 46], "keyword": 33, "intention": 33, "constrain": [33, 39, 40], "hundr": 33, "nor": 33, "quantiti": [33, 45], "hyper": 33, "suppos": 33, "app1": 33, "app2": 33, "feed": 33, "seem": 33, "worri": 33, "pseudo": 33, "do_someth": 33, "s3client": 33, "utf": 33, "output_fil": 33, "input_fil": 33, "decod": 33, "do_something_els": 33, "app1_out": 33, "app1_accuraci": 33, "l2norm": 33, "liter": [33, 45], "1kb": 33, "slash": 33, "statist": 33, "sem": 33, "uniqu": [33, 34, 43, 44, 45], "scope": 33, "central": 33, "entiti": 33, "strong": 33, "made": [33, 45], "similarli": 33, "consecut": 33, "BE": 33, "min": 33, "strongli": 33, "advis": 33, "concaten": 33, "experiment_id": 33, "trial_numb": 33, "123": 33, "attempt_1": 33, "233": 33, "outsid": 33, "get_scheduler_factori": 34, "get_default_scheduler_nam": 34, "default_scheduler_nam": 34, "abc": 34, "abstractmethod": 34, "kill": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "idempot": 34, "thread": [34, 41, 45], "safe": 34, "underli": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "longer": [34, 41], "wrap": [34, 40, 41, 46], "describeapprespons": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "qualifi": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "constitut": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "caller": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prior": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "Is": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "twice": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lost": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "live": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "arbitrari": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "stopiter": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "exhaust": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stuck": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "eventu": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "__getitem__": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "seek": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "50th": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "carriag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "select": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "combin": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "notimplementederror": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "encourag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "trivial": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "submit_dryrun": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "mostli": 34, "regard": 34, "not_set": 34, "appstat": [34, 40, 45], "unsubmit": [34, 45], "num_restart": [34, 45], "structured_error_msg": [34, 45], "ui_url": [34, 45], "roles_status": 34, "rolestatu": [34, 45], "suffici": 34, "recret": 34, "member": 34, "accessor": [34, 45], "popul": [34, 45], "userguid": 35, "batch_getstart": 35, "ecr": 35, "amazonecr": 35, "aws_batch_schedul": 35, "awsbatchschedul": 35, "log_client": 35, "docker_cli": [35, 36, 39, 40, 47], "dockercli": [35, 36, 39, 40, 47], "dockerworkspacemixin": [35, 36, 37, 39, 40, 47], "awsbatchopt": 35, "torchx_us": [35, 36, 39, 44], "1234": [35, 36, 39, 44], "ef": 35, "infiniband": 35, "uverbs0": 35, "perm": [35, 39, 40, 45], "rwm": [35, 37, 39, 40, 45], "parse_mount": [35, 37, 39, 40, 45], "fsx": 35, "repost": 35, "knowledg": 35, "center": 35, "lustr": 35, "fabric": 35, "efa": 35, "batchjob": 35, "nonetyp": [35, 42], "job_def": [35, 36, 38], "images_to_push": [35, 36, 39, 40, 47], "aws_sagemaker_schedul": 36, "awssagemakerschedul": 36, "awssagemakeropt": 36, "sagemakerschedul": 36, "awssagemakerjob": 36, "requri": 36, "docker_schedul": 37, "dockerschedul": 37, "dockeropt": 37, "closest": 37, "dockerjob": 37, "dockercontain": 37, "has_dock": 37, "gcp_batch_schedul": 38, "gcpbatchschedul": 38, "gcpbatchopt": 38, "app_id1234": 38, "gcloud": 38, "gcpbatchjob": 38, "batch_v1": 38, "upgrad": 39, "kubectl": 39, "githubusercont": 39, "develop": 39, "kubernetes_schedul": 39, "kubernetesschedul": 39, "apicli": [39, 40], "kubernetesopt": 39, "confirm": [39, 40], "issu": [39, 40, 42, 44], "120": 39, "occur": [39, 45], "bug": 39, "1651": 39, "extern": [39, 40], "hostpath": [39, 40], "persistentvolumeclaim": [39, 40], "claim": [39, 40], "16000": [39, 40], "reserv": [39, 40], "whole": [39, 40, 45], "reduc": [39, 40], "amount": [39, 40], "kubernetesjob": 39, "app_to_resourc": [39, 40], "macro": 39, "pod_label": [39, 40], "role_idx": [39, 40], "role_to_pod": [39, 40], "v1pod": [39, 40], "sanitize_for_seri": [39, 40], "obj": [39, 40, 45], "dispatch": 40, "appwrapp": 40, "codeflar": 40, "kubernetes_mcad_schedul": 40, "kubernetesmcadschedul": 40, "kubernetesmcadopt": 40, "among": 40, "e790d7f": 40, "your_image_repo": 40, "secondari": 40, "coschedul": 40, "podgroup": 40, "sig": 40, "tree": 40, "pkg": 40, "crd": 40, "k8": 40, "io_podgroup": 40, "At": 40, "guidanc": 40, "evict": [40, 45], "preemption": [40, 45], "multu": 40, "k8snetworkplumbingwg": 40, "cni": 40, "kubernetesmcadjob": 40, "mcad_svc": 40, "svc_name": 40, "service_port": 40, "v1servic": 40, "get_appwrapper_statu": 40, "get_port_for_servic": 40, "get_role_inform": 40, "generic_item": 40, "get_tasks_status_descript": 40, "unique_app_id": 40, "localschedul": 41, "image_provider_class": 41, "localopt": 41, "imageprovid": 41, "cache_s": 41, "extra_path": 41, "properti": [41, 45, 46], "enforc": 41, "orphan": 41, "cleanup": 41, "receiv": 41, "sigterm": 41, "sigint": 41, "spawn": 41, "faster": 41, "softwar": [41, 45], "cuda_visible_devic": 41, "accord": [41, 45], "replica_0": 41, "replica_1": 41, "role_0": 41, "role_1": 41, "replica_2": 41, "localhostschedul": 41, "real": 41, "op": 41, "fetch_rol": 41, "updat": [41, 47], "compli": [41, 45], "deleg": 41, "get_cwd": 41, "child": [41, 45], "get_entrypoint": 41, "get_replica_param": 41, "holder": 41, "cwdimageprovid": 41, "localdirectoryimageprovid": 41, "getcwd": 41, "conjunct": 41, "not_exist": 41, "image_typ": 41, "childprocess": 41, "logiter": 41, "log_fil": 41, "_popen": 41, "signalexcept": 41, "sigval": 41, "signal": 41, "got": 41, "feedback": 42, "edit": 42, "pak": 42, "lsf_schedul": 42, "lsfschedul": 42, "lsfopt": 42, "mnt": 42, "tofix": 42, "On": 42, "reoslv": 42, "lsfbsub": 42, "ray_schedul": 43, "rayschedul": 43, "ray_client": 43, "jobsubmissioncli": 43, "tmpdirworkspacemixin": 43, "rayopt": 43, "actor": 43, "torchxignor": [43, 47], "overridden": 43, "dummi": 43, "rayjob": 43, "wait_until_finish": 43, "has_rai": 43, "indic": [43, 45], "rayactor": 43, "output_filenam": 43, "working_dir": 43, "ray_common": 43, "ip": 43, "connect": 43, "ray_main": 43, "slurm_schedul": 44, "slurmschedul": 44, "dirworkspacemixin": [44, 47], "slurmopt": 44, "heterogen": 44, "sbatch": 44, "jobid": 44, "abl": 44, "schedmd": 44, "section_opt": 44, "inherit": 44, "activ": 44, "heterogeneous_job": 44, "snapshot": 44, "1gb": 44, "realmemori": 44, "workaround": 44, "parallelclust": 44, "2198": 44, "slurmbatchrequest": 44, "slurmreplicarequest": 44, "srun_opt": 44, "sbatch_opt": 44, "classmethod": 44, "from_rol": 44, "nomem": 44, "srun": 44, "treatment": 45, "min_replica": 45, "base_imag": 45, "miss": 45, "bindmount": 45, "volumemount": 45, "devicemount": 45, "duti": 45, "ps": 45, "bundl": 45, "dictat": 45, "ball": 45, "my_imag": 45, "env_var": 45, "500": 45, "tcp_store": 45, "8080": 45, "auto": 45, "scale": 45, "hot_spar": 45, "quorum": 45, "give": 45, "least": 45, "9090": 45, "pre_proc": 45, "encount": 45, "unsuccess": 45, "hardwar": 45, "caveat": 45, "surviv": 45, "untouch": 45, "membership": 45, "departur": 45, "admitt": 45, "violat": 45, "spare": 45, "realli": 45, "physic": 45, "ram": 45, "predec": 45, "registr": 45, "retriev": 45, "gpu_x_1": 45, "named_resources_aw": 45, "taken": 45, "mere": 45, "equval": 45, "mem": 45, "aws_t3": 45, "medium": 45, "aws_m5": 45, "8xlarg": 45, "aws_m5_2xlarg": 45, "aws_p3_2xlarg": 45, "aws_p3_8xlarg": 45, "aws_t3_medium": 45, "mention": 45, "image_root_dir": 45, "train_app": 45, "rank0_env": 45, "base_img_root": 45, "accept": 45, "run_config_opt": 45, "run_as_us": 45, "type_": 45, "cluster_id": 45, "preemptibl": 45, "illeg": 45, "bad_typ": 45, "cfg_kei": 45, "cfg_from_json_repr": 45, "json_repr": 45, "cfg_from_str": 45, "cfg_str": 45, "cast": 45, "appropri": 45, "unknown": 45, "cfg_liter": 45, "kv": 45, "semi": 45, "colon": 45, "cfgval": 45, "trail": 45, "strictli": 45, "correct": 45, "is_typ": 45, "tp": 45, "isinst": 45, "text": 45, "recent": 45, "filter_rol": 45, "appstatuserror": 45, "pend": 45, "yet": [45, 46], "unsuccessfulli": 45, "replicast": 45, "alia": 45, "src_path": 45, "dst_path": 45, "read_onli": 45, "mknode": 45, "file_lint": 45, "component_funct": 45, "lintermessag": 45, "vaidat": 45, "stypl": 45, "get_fn_docstr": 45, "char": 45, "torchfunctionvisitor": 45, "component_function_nam": 45, "visitor": 45, "torchxfunctionargsvalid": 45, "criteria": 45, "primitive_typ": 45, "visit_functiondef": 45, "functiondef": 45, "torchxargumenthelpformatt": 45, "prog": 45, "indent_incr": 45, "max_help_posit": 45, "width": 45, "formatt": 45, "app_specs_func_def": 45, "torchxfunctionvalid": 45, "torchxreturnvalid": 45, "practition": 46, "conceptu": 46, "uniform": 46, "solut": 46, "tracker_nam": 46, "inject": 46, "entry_point_or_module_factory_method": 46, "tracker1": 46, "tracker2": 46, "backend_2_entry_point": 46, "tracker3": 46, "mlflow": 46, "create_track": 46, "my_bucket": 46, "my_config": 46, "discover": 46, "accomplish": 46, "entry_point_nam": 46, "create_tracker_fn": 46, "app_run_from_env": 46, "torchx_job_id": 46, "app_run": 46, "fsspectrack": 46, "cmdtracker": 46, "parent": 46, "artifact_nam": 46, "consumpt": 46, "encapsul": 46, "stil": 46, "abstractfilesystem": [46, 47], "root_dir": 46, "backward": 46, "gurante": 46, "subdir": 46, "descend": 46, "cmd_tracker": 46, "workspacemixin": 47, "mix": 47, "abil": 47, "codebas": 47, "build_workspace_and_update_rol": 47, "simplest": 47, "effici": 47, "increment": 47, "mutat": 47, "dryrun_push_imag": 47, "dryrun_push": 47, "push_imag": 47, "workspace_opt": 47, "walk_workspac": 47, "ignore_nam": 47, "engin": 47, "builder": 47, "exclud": 47, "whose": 47, "_update_app_imag": 47, "_push_imag": 47}, "objects": {"torchx": [[3, 0, 0, "-", "cli"], [8, 0, 0, "-", "components"], [26, 0, 0, "-", "pipelines"], [30, 0, 0, "-", "runner"], [32, 0, 0, "-", "runtime"], [34, 0, 0, "-", "schedulers"], [45, 0, 0, "-", "specs"], [46, 0, 0, "-", "tracker"], [47, 0, 0, "-", "workspace"]], "torchx.cli.cmd_tracker": [[46, 1, 1, "", "CmdTracker"]], "torchx.components": [[4, 0, 0, "-", "component_test_base"], [5, 0, 0, "-", "dist"], [6, 0, 0, "-", "interpret"], [7, 0, 0, "-", "metrics"], [9, 0, 0, "-", "serve"], [10, 0, 0, "-", "train"], [11, 0, 0, "-", "utils"]], "torchx.components.component_test_base": [[4, 1, 1, "", "ComponentTestCase"]], "torchx.components.component_test_base.ComponentTestCase": [[4, 2, 1, "", "run_component"], [4, 2, 1, "", "setUp"], [4, 2, 1, "", "tearDown"], [4, 2, 1, "", "validate"]], "torchx.components.dist": [[5, 3, 1, "", "_TORCH_DEBUG_FLAGS"], [5, 4, 1, "", "ddp"]], "torchx.components.metrics": [[7, 4, 1, "", "tensorboard"]], "torchx.components.serve": [[9, 4, 1, "", "torchserve"]], "torchx.components.utils": [[11, 4, 1, "", "binary"], [11, 4, 1, "", "booth"], [11, 4, 1, "", "copy"], [11, 4, 1, "", "echo"], [11, 4, 1, "", "python"], [11, 4, 1, "", "sh"], [11, 4, 1, "", "touch"]], "torchx.pipelines": [[28, 0, 0, "-", "kfp"]], "torchx.pipelines.kfp.adapter": [[28, 1, 1, "", "ContainerFactory"], [28, 4, 1, "", "component_from_app"], [28, 4, 1, "", "component_spec_from_app"], [28, 4, 1, "", "container_from_app"], [28, 4, 1, "", "resource_from_app"]], "torchx.runner": [[30, 1, 1, "", "Runner"], [31, 0, 0, "-", "config"], [30, 4, 1, "", "get_runner"]], "torchx.runner.Runner": [[30, 2, 1, "", "cancel"], [30, 2, 1, "", "close"], [30, 2, 1, "", "describe"], [30, 2, 1, "", "dryrun"], [30, 2, 1, "", "dryrun_component"], [30, 2, 1, "", "list"], [30, 2, 1, "", "log_lines"], [30, 2, 1, "", "run"], [30, 2, 1, "", "run_component"], [30, 2, 1, "", "schedule"], [30, 2, 1, "", "scheduler_backends"], [30, 2, 1, "", "scheduler_run_opts"], [30, 2, 1, "", "status"], [30, 2, 1, "", "stop"], [30, 2, 1, "", "wait"]], "torchx.runner.config": [[31, 4, 1, "", "apply"], [31, 4, 1, "", "dump"], [31, 4, 1, "", "find_configs"], [31, 4, 1, "", "get_config"], [31, 4, 1, "", "get_configs"], [31, 4, 1, "", "load"], [31, 4, 1, "", "load_sections"]], "torchx.runtime": [[33, 0, 0, "-", "tracking"]], "torchx.runtime.tracking": [[33, 1, 1, "", "FsspecResultTracker"], [33, 1, 1, "", "ResultTracker"]], "torchx.schedulers": [[34, 1, 1, "", "Scheduler"], [34, 1, 1, "", "SchedulerFactory"], [35, 0, 0, "-", "aws_batch_scheduler"], [36, 0, 0, "-", "aws_sagemaker_scheduler"], [37, 0, 0, "-", "docker_scheduler"], [38, 0, 0, "-", "gcp_batch_scheduler"], [34, 4, 1, "", "get_default_scheduler_name"], [34, 4, 1, "", "get_scheduler_factories"], [40, 0, 0, "-", "kubernetes_mcad_scheduler"], [39, 0, 0, "-", "kubernetes_scheduler"], [41, 0, 0, "-", "local_scheduler"], [42, 0, 0, "-", "lsf_scheduler"], [43, 0, 0, "-", "ray_scheduler"], [44, 0, 0, "-", "slurm_scheduler"]], "torchx.schedulers.Scheduler": [[34, 2, 1, "", "cancel"], [34, 2, 1, "", "close"], [34, 2, 1, "", "describe"], [34, 2, 1, "", "exists"], [34, 2, 1, "", "list"], [34, 2, 1, "", "log_iter"], [34, 2, 1, "", "run_opts"], [34, 2, 1, "", "schedule"], [34, 2, 1, "", "submit"], [34, 2, 1, "", "submit_dryrun"]], "torchx.schedulers.api": [[34, 1, 1, "", "DescribeAppResponse"], [34, 1, 1, "", "ListAppResponse"]], "torchx.schedulers.aws_batch_scheduler": [[35, 1, 1, "", "AWSBatchScheduler"], [35, 1, 1, "", "BatchJob"], [35, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_batch_scheduler.AWSBatchScheduler": [[35, 2, 1, "", "describe"], [35, 2, 1, "", "list"], [35, 2, 1, "", "log_iter"], [35, 2, 1, "", "schedule"]], "torchx.schedulers.aws_sagemaker_scheduler": [[36, 1, 1, "", "AWSSageMakerJob"], [36, 1, 1, "", "AWSSageMakerScheduler"], [36, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_sagemaker_scheduler.AWSSageMakerScheduler": [[36, 2, 1, "", "describe"], [36, 2, 1, "", "list"], [36, 2, 1, "", "log_iter"], [36, 2, 1, "", "schedule"]], "torchx.schedulers.docker_scheduler": [[37, 1, 1, "", "DockerContainer"], [37, 1, 1, "", "DockerJob"], [37, 1, 1, "", "DockerScheduler"], [37, 4, 1, "", "create_scheduler"], [37, 4, 1, "", "has_docker"]], "torchx.schedulers.docker_scheduler.DockerScheduler": [[37, 2, 1, "", "describe"], [37, 2, 1, "", "list"], [37, 2, 1, "", "log_iter"], [37, 2, 1, "", "schedule"]], "torchx.schedulers.gcp_batch_scheduler": [[38, 1, 1, "", "GCPBatchJob"], [38, 1, 1, "", "GCPBatchScheduler"], [38, 4, 1, "", "create_scheduler"]], "torchx.schedulers.gcp_batch_scheduler.GCPBatchScheduler": [[38, 2, 1, "", "describe"], [38, 2, 1, "", "list"], [38, 2, 1, "", "log_iter"], [38, 2, 1, "", "run_opts"], [38, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_mcad_scheduler": [[40, 1, 1, "", "KubernetesMCADJob"], [40, 1, 1, "", "KubernetesMCADScheduler"], [40, 4, 1, "", "app_to_resource"], [40, 4, 1, "", "create_scheduler"], [40, 4, 1, "", "get_appwrapper_status"], [40, 4, 1, "", "get_port_for_service"], [40, 4, 1, "", "get_role_information"], [40, 4, 1, "", "get_tasks_status_description"], [40, 4, 1, "", "mcad_svc"], [40, 4, 1, "", "pod_labels"], [40, 4, 1, "", "role_to_pod"], [40, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_mcad_scheduler.KubernetesMCADScheduler": [[40, 2, 1, "", "describe"], [40, 2, 1, "", "list"], [40, 2, 1, "", "log_iter"], [40, 2, 1, "", "run_opts"], [40, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_scheduler": [[39, 1, 1, "", "KubernetesJob"], [39, 1, 1, "", "KubernetesScheduler"], [39, 4, 1, "", "app_to_resource"], [39, 4, 1, "", "create_scheduler"], [39, 4, 1, "", "pod_labels"], [39, 4, 1, "", "role_to_pod"], [39, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_scheduler.KubernetesScheduler": [[39, 2, 1, "", "describe"], [39, 2, 1, "", "list"], [39, 2, 1, "", "log_iter"], [39, 2, 1, "", "schedule"]], "torchx.schedulers.local_scheduler": [[41, 1, 1, "", "CWDImageProvider"], [41, 1, 1, "", "ImageProvider"], [41, 1, 1, "", "LocalDirectoryImageProvider"], [41, 1, 1, "", "LocalScheduler"], [41, 1, 1, "", "LogIterator"], [41, 1, 1, "", "PopenRequest"], [41, 1, 1, "", "ReplicaParam"], [41, 1, 1, "", "SignalException"], [41, 4, 1, "", "create_scheduler"]], "torchx.schedulers.local_scheduler.CWDImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.ImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "fetch_role"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"], [41, 2, 1, "", "get_replica_param"]], "torchx.schedulers.local_scheduler.LocalDirectoryImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.LocalScheduler": [[41, 2, 1, "", "auto_set_CUDA_VISIBLE_DEVICES"], [41, 2, 1, "", "close"], [41, 2, 1, "", "describe"], [41, 2, 1, "", "list"], [41, 2, 1, "", "log_iter"], [41, 2, 1, "", "schedule"]], "torchx.schedulers.lsf_scheduler": [[42, 1, 1, "", "LsfBsub"], [42, 1, 1, "", "LsfScheduler"], [42, 4, 1, "", "create_scheduler"]], "torchx.schedulers.lsf_scheduler.LsfScheduler": [[42, 2, 1, "", "describe"], [42, 2, 1, "", "list"], [42, 2, 1, "", "log_iter"], [42, 2, 1, "", "schedule"]], "torchx.schedulers.ray_scheduler": [[43, 1, 1, "", "RayJob"], [43, 1, 1, "", "RayScheduler"], [43, 4, 1, "", "create_scheduler"], [43, 4, 1, "", "has_ray"], [43, 4, 1, "", "serialize"]], "torchx.schedulers.ray_scheduler.RayScheduler": [[43, 2, 1, "", "describe"], [43, 2, 1, "", "list"], [43, 2, 1, "", "log_iter"], [43, 2, 1, "", "schedule"], [43, 2, 1, "", "wait_until_finish"]], "torchx.schedulers.slurm_scheduler": [[44, 1, 1, "", "SlurmBatchRequest"], [44, 1, 1, "", "SlurmReplicaRequest"], [44, 1, 1, "", "SlurmScheduler"], [44, 4, 1, "", "create_scheduler"]], "torchx.schedulers.slurm_scheduler.SlurmBatchRequest": [[44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmReplicaRequest": [[44, 2, 1, "", "from_role"], [44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmScheduler": [[44, 2, 1, "", "describe"], [44, 2, 1, "", "list"], [44, 2, 1, "", "log_iter"], [44, 2, 1, "", "schedule"]], "torchx.specs": [[45, 1, 1, "", "AppDef"], [45, 1, 1, "", "AppState"], [45, 1, 1, "", "AppStatus"], [45, 1, 1, "", "BindMount"], [45, 1, 1, "", "DeviceMount"], [45, 5, 1, "", "ReplicaState"], [45, 1, 1, "", "Resource"], [45, 1, 1, "", "RetryPolicy"], [45, 1, 1, "", "Role"], [45, 1, 1, "", "VolumeMount"], [45, 0, 0, "-", "file_linter"], [45, 4, 1, "", "get_named_resources"], [45, 1, 1, "", "macros"], [45, 0, 0, "-", "named_resources_aws"], [45, 4, 1, "", "parse_mounts"], [45, 4, 1, "", "resource"], [45, 1, 1, "", "runopts"]], "torchx.specs.AppStatus": [[45, 2, 1, "", "format"], [45, 2, 1, "", "raise_for_status"]], "torchx.specs.Resource": [[45, 2, 1, "", "copy"]], "torchx.specs.Role": [[45, 2, 1, "", "pre_proc"]], "torchx.specs.file_linter": [[45, 1, 1, "", "LinterMessage"], [45, 1, 1, "", "TorchFunctionVisitor"], [45, 1, 1, "", "TorchXArgumentHelpFormatter"], [45, 1, 1, "", "TorchxFunctionArgsValidator"], [45, 1, 1, "", "TorchxFunctionValidator"], [45, 1, 1, "", "TorchxReturnValidator"], [45, 4, 1, "", "get_fn_docstring"], [45, 4, 1, "", "validate"]], "torchx.specs.file_linter.TorchFunctionVisitor": [[45, 2, 1, "", "visit_FunctionDef"]], "torchx.specs.file_linter.TorchxFunctionArgsValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxFunctionValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxReturnValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.macros": [[45, 1, 1, "", "Values"]], "torchx.specs.macros.Values": [[45, 2, 1, "", "apply"], [45, 2, 1, "", "substitute"]], "torchx.specs.named_resources_aws": [[45, 4, 1, "", "aws_m5_2xlarge"], [45, 4, 1, "", "aws_p3_2xlarge"], [45, 4, 1, "", "aws_p3_8xlarge"], [45, 4, 1, "", "aws_t3_medium"]], "torchx.specs.runopts": [[45, 2, 1, "", "add"], [45, 2, 1, "", "cfg_from_json_repr"], [45, 2, 1, "", "cfg_from_str"], [45, 2, 1, "", "get"], [45, 2, 1, "", "is_type"], [45, 2, 1, "", "resolve"]], "torchx.tracker": [[46, 1, 1, "", "AppRun"]], "torchx.tracker.api": [[46, 1, 1, "", "TrackerBase"]], "torchx.tracker.backend.fsspec": [[46, 1, 1, "", "FsspecTracker"]], "torchx.workspace": [[47, 1, 1, "", "WorkspaceMixin"], [47, 0, 0, "-", "dir_workspace"], [47, 0, 0, "-", "docker_workspace"], [47, 4, 1, "", "walk_workspace"]], "torchx.workspace.WorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]], "torchx.workspace.dir_workspace": [[47, 1, 1, "", "DirWorkspaceMixin"]], "torchx.workspace.dir_workspace.DirWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"]], "torchx.workspace.docker_workspace": [[47, 1, 1, "", "DockerWorkspaceMixin"]], "torchx.workspace.docker_workspace.DockerWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:data", "4": "py:function", "5": "py:attribute"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "data", "Python data"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"]}, "titleterms": {"advanc": [0, 22, 46], "usag": [0, 14, 17, 20, 25, 31, 33, 46], "regist": 0, "custom": [0, 8, 12, 29], "schedul": [0, 2, 3, 25, 29, 34], "name": [0, 4, 45], "resourc": [0, 4, 8, 45], "compon": [0, 2, 3, 4, 5, 8, 12, 22, 25, 45], "app": [1, 14], "best": [1, 4, 25], "practic": [1, 4, 25], "data": [1, 14, 15, 46], "pass": [1, 8], "storag": 1, "train": [1, 10], "loop": 1, "metric": [1, 7], "checkpoint": 1, "fine": 1, "tune": 1, "interpret": [1, 6, 17], "model": [1, 17, 18], "packag": 1, "python": 1, "save": 1, "weight": 1, "torchscript": 1, "torchserv": 1, "archiv": 1, "mar": 1, "torch": 1, "serv": [1, 9], "infer": 1, "test": [1, 4], "basic": 2, "concept": 2, "project": 2, "structur": 2, "appdef": [2, 45], "runner": [2, 30], "pipelin": [2, 8, 21, 22, 23, 24, 25, 26, 28], "adapt": 2, "runtim": [2, 25], "next": [2, 27, 29], "step": [2, 27, 29], "cli": [3, 8, 31], "list": 3, "builtin": [3, 5, 8, 12], "support": 3, "argument": [3, 4, 22], "run": [3, 8, 45], "job": [3, 8, 46], "inspect": 3, "what": 3, "dryrun": 3, "describ": 3, "queri": [3, 46], "statu": [3, 45], "view": 3, "log": [3, 19], "entrypoint": 4, "simplifi": 4, "process": 4, "branch": 4, "logic": 4, "document": [4, 25], "compos": 4, "distribut": [4, 5, 23, 29], "defin": 4, "all": [4, 26, 34], "unit": 4, "integr": 4, "ddp": 5, "api": [5, 25, 31], "refer": [7, 25, 35, 36, 37, 38, 39, 40, 41, 42, 46], "overview": [8, 32, 33, 46], "us": 8, "author": 8, "valid": 8, "programmat": [8, 31], "param": 8, "from": 8, "addit": 8, "embed": 10, "script": 10, "util": 11, "hello": [12, 29], "world": [12, 13, 15, 29], "comput": [13, 15], "size": [13, 15], "exampl": [13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 46], "preprocess": [14, 15], "applic": [15, 25], "prerequisit": [15, 35, 38, 39, 40, 42], "lightn": 15, "trainer": [15, 16, 20], "dataset": 16, "tini": 18, "imagenet": 18, "simpl": 19, "profil": 19, "kubeflow": [21, 22, 23, 24, 28], "input": 22, "creat": 22, "definit": 22, "intro": 24, "torchx": [25, 26, 28, 30, 34, 45, 46, 47], "In": 25, "1": [25, 46], "2": [25, 46], "3": 25, "work": 25, "With": 25, "librari": 25, "airflow": 27, "kfp": 28, "quickstart": 29, "instal": 29, "launch": 29, "workspac": [29, 47], "patch": 29, "torchxconfig": [29, 31], "remot": 29, "imag": [29, 41], "docker": [29, 37], "base": [29, 33], "slurm": [29, 44], "function": [30, 31, 34], "class": [30, 34], "config": [31, 45], "track": 33, "resulttrack": 33, "fsspec": 33, "aw": [35, 36, 45], "batch": [35, 38], "sagemak": 36, "gcp": 38, "kubernet": [39, 40], "mcad": 40, "local": 41, "provid": 41, "ibm": 42, "spectrum": 42, "lsf": 42, "rai": 43, "spec": 45, "role": 45, "macro": 45, "mount": 45, "linter": 45, "tracker": 46, "setup": 46, "launcher": 46, "side": 46, "configur": 46, "user": 46, "acquir": 46, "apprun": 46, "instanc": 46, "trackerbas": 46, "implement": 46, "docker_workspac": 47, "dir_workspac": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["advanced", "app_best_practices", "basics", "cli", "component_best_practices", "components/distributed", "components/interpret", "components/metrics", "components/overview", "components/serve", "components/train", "components/utils", "custom_components", "examples_apps/compute_world_size/main", "examples_apps/datapreproc/datapreproc", "examples_apps/index", "examples_apps/lightning/data", "examples_apps/lightning/interpret", "examples_apps/lightning/model", "examples_apps/lightning/profiler", "examples_apps/lightning/train", "examples_pipelines/index", "examples_pipelines/kfp/advanced_pipeline", "examples_pipelines/kfp/dist_pipeline", "examples_pipelines/kfp/intro_pipeline", "index", "pipelines", "pipelines/airflow", "pipelines/kfp", "quickstart", "runner", "runner.config", "runtime/overview", "runtime/tracking", "schedulers", "schedulers/aws_batch", "schedulers/aws_sagemaker", "schedulers/docker", "schedulers/gcp_batch", "schedulers/kubernetes", "schedulers/kubernetes_mcad", "schedulers/local", "schedulers/lsf", "schedulers/ray", "schedulers/slurm", "specs", "tracker", "workspace"], "filenames": ["advanced.rst", "app_best_practices.rst", "basics.rst", "cli.rst", "component_best_practices.rst", "components/distributed.rst", "components/interpret.rst", "components/metrics.rst", "components/overview.rst", "components/serve.rst", "components/train.rst", "components/utils.rst", "custom_components.md", "examples_apps/compute_world_size/main.rst", "examples_apps/datapreproc/datapreproc.rst", "examples_apps/index.rst", "examples_apps/lightning/data.rst", "examples_apps/lightning/interpret.rst", "examples_apps/lightning/model.rst", "examples_apps/lightning/profiler.rst", "examples_apps/lightning/train.rst", "examples_pipelines/index.rst", "examples_pipelines/kfp/advanced_pipeline.rst", "examples_pipelines/kfp/dist_pipeline.rst", "examples_pipelines/kfp/intro_pipeline.rst", "index.rst", "pipelines.rst", "pipelines/airflow.md", "pipelines/kfp.rst", "quickstart.md", "runner.rst", "runner.config.rst", "runtime/overview.rst", "runtime/tracking.rst", "schedulers.rst", "schedulers/aws_batch.rst", "schedulers/aws_sagemaker.rst", "schedulers/docker.rst", "schedulers/gcp_batch.rst", "schedulers/kubernetes.rst", "schedulers/kubernetes_mcad.rst", "schedulers/local.rst", "schedulers/lsf.rst", "schedulers/ray.rst", "schedulers/slurm.rst", "specs.rst", "tracker.rst", "workspace.rst"], "titles": ["Advanced Usage", "App Best Practices", "Basic Concepts", "CLI", "Component Best Practices", "Distributed", "Interpret", "Metrics", "Overview", "Serve", "Train", "Utils", "Custom Components", "Compute World Size Example", "Data Preprocessing App Example", "Application Examples", "Trainer Datasets Example", "Model Interpretability Example", "Tiny ImageNet Model", "Simple Logging Profiler", "Trainer Example", "Pipelines Examples", "Advanced KubeFlow Pipelines Example", "Distributed KubeFlow Pipelines Example", "Intro KubeFlow Pipelines Example", "TorchX", "torchx.pipelines", "Airflow", "Kubeflow Pipelines", "Quickstart", "torchx.runner", ".torchxconfig", "Overview", "Tracking", "torchx.schedulers", "AWS Batch", "AWS SageMaker", "Docker", "GCP Batch", "Kubernetes", "Kubernetes-MCAD", "Local", "IBM Spectrum LSF", "Ray", "Slurm", "torchx.specs", "torchx.tracker", "torchx.workspace"], "terms": {"torchx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 27, 29, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "defin": [0, 2, 3, 8, 16, 22, 23, 24, 27, 29, 31, 34, 36, 40, 45, 46, 47], "plugin": [0, 34, 39, 40], "point": [0, 1, 2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "you": [0, 1, 2, 3, 4, 5, 7, 8, 10, 12, 14, 15, 16, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 42, 43, 45, 47], "configur": [0, 4, 12, 27, 29, 30, 31, 34, 35, 36, 38, 40, 43, 44, 45, 47], "best": [0, 2, 3, 8, 10, 30], "support": [0, 1, 2, 4, 8, 11, 25, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "your": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 18, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 42, 45, 46, 47], "infrastructur": [0, 1, 2, 13, 15, 32], "setup": [0, 3, 4, 13, 15, 16, 17, 20, 22, 35, 45], "most": [0, 1, 2, 3, 22, 45], "done": [0, 12, 27], "through": [0, 2, 3, 8, 27, 28, 29, 31, 45, 47], "python": [0, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 30, 43, 45], "s": [0, 1, 3, 4, 5, 8, 11, 14, 15, 16, 17, 18, 19, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "entri": [0, 29, 30, 36], "requir": [0, 2, 4, 5, 8, 10, 11, 12, 14, 15, 17, 22, 29, 31, 32, 35, 36, 39, 40, 41, 43, 45, 46, 47], "packag": [0, 22, 27, 29, 30, 46], "contain": [0, 2, 3, 5, 8, 9, 11, 12, 15, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 47], "them": [0, 1, 2, 3, 4, 8, 12, 16, 17, 20, 22, 23, 26, 31, 34, 35, 44, 45], "instal": [0, 8, 12, 13, 15, 23, 25, 27, 28, 35, 37, 38, 39, 40, 42, 43, 44, 45], "If": [0, 1, 2, 3, 4, 8, 12, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "don": [0, 1, 2, 8, 10, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "t": [0, 1, 2, 4, 8, 10, 12, 18, 22, 27, 28, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "have": [0, 1, 2, 3, 4, 5, 8, 10, 12, 14, 15, 16, 18, 20, 22, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "we": [0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 16, 18, 22, 23, 24, 27, 29, 34, 39, 44], "recommend": [0, 1, 2, 4, 6, 7, 30, 40], "make": [0, 1, 2, 4, 9, 13, 20, 22, 24, 29, 30, 31, 32, 33, 41, 45, 47], "one": [0, 1, 2, 3, 4, 5, 8, 22, 24, 29, 30, 31, 33, 36, 37, 39, 41, 45], "so": [0, 1, 3, 4, 5, 8, 12, 15, 16, 22, 26, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "can": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "share": [0, 4, 8, 12, 15, 29, 35, 42], "definit": [0, 1, 2, 4, 8, 9, 12, 15, 21, 23, 24, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "across": [0, 1, 2, 15, 16, 33], "team": 0, "org": [0, 7, 9, 12, 28, 29, 30, 45], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 14, 15, 17, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "describ": [0, 1, 2, 24, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "below": [0, 2, 3, 8, 17, 30, 31, 45], "specifi": [0, 2, 3, 4, 5, 7, 8, 11, 14, 16, 17, 19, 20, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "project": [0, 13, 29, 31, 38, 40, 47], "py": [0, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 41, 42, 43, 45], "file": [0, 1, 2, 3, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 22, 23, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "from": [0, 1, 2, 3, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "setuptool": 0, "import": [0, 1, 2, 3, 4, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 31, 33, 39, 40, 45, 46], "foobar": [0, 31, 33, 41, 45], "entry_point": [0, 46], "my_schedul": 0, "my": 0, "create_schedul": [0, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "named_resourc": [0, 8, 45], "gpu_x2": 0, "my_modul": [0, 45, 46], "mai": [0, 1, 2, 3, 8, 11, 12, 20, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "implement": [0, 15, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "class": [0, 1, 4, 16, 18, 19, 28, 29, 31, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "interfac": [0, 2, 34, 35, 36, 37, 38, 39, 40, 43, 44, 46, 47], "function": [0, 2, 3, 4, 5, 8, 11, 12, 13, 18, 23, 24, 27, 28, 29, 36, 45], "should": [0, 2, 3, 4, 8, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "follow": [0, 2, 5, 8, 12, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "signatur": 0, "def": [0, 1, 2, 3, 4, 8, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 31, 43, 44, 45], "session_nam": [0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "str": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "kwarg": [0, 16, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "object": [0, 2, 3, 8, 9, 16, 22, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "return": [0, 2, 4, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 27, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "myschedul": 0, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "ad": [0, 1, 2, 8, 30, 31, 44, 46], "an": [0, 2, 3, 4, 6, 7, 8, 10, 12, 13, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "A": [0, 2, 5, 8, 31, 45, 47], "set": [0, 1, 3, 4, 5, 11, 12, 22, 23, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 44, 45, 47], "predefin": [0, 2], "spec": [0, 2, 3, 4, 5, 8, 9, 12, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 37, 39, 40, 41, 42], "ar": [0, 1, 2, 3, 4, 5, 7, 8, 11, 15, 17, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "given": [0, 2, 3, 8, 9, 16, 30, 31, 41, 43, 45], "string": [0, 3, 4, 8, 11, 29, 41, 45, 47], "particularli": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "when": [0, 1, 2, 3, 4, 5, 7, 8, 22, 25, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cluster": [0, 2, 12, 14, 21, 22, 23, 24, 27, 29, 39, 40, 42, 43, 44, 45, 47], "ha": [0, 2, 4, 5, 8, 10, 11, 13, 15, 17, 19, 20, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "fix": [0, 2], "instanc": [0, 2, 4, 5, 7, 8, 12, 22, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "type": [0, 1, 2, 3, 4, 5, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45], "For": [0, 1, 2, 4, 5, 7, 8, 10, 15, 16, 17, 20, 22, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "deep": 0, "learn": [0, 1, 2, 3, 10, 20, 27, 40], "train": [0, 4, 5, 7, 8, 13, 15, 16, 17, 18, 19, 20, 22, 25, 27, 29, 31, 33, 36, 40], "kubernet": [0, 2, 3, 5, 12, 14, 17, 22, 23, 25, 28, 29, 30, 31, 34, 35, 37], "aw": [0, 2, 25, 29, 34, 44], "compris": 0, "onli": [0, 2, 3, 5, 8, 20, 22, 23, 24, 28, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "p3": 0, "16xlarg": 0, "64": [0, 16, 17, 29, 45], "vcpu": 0, "8": [0, 5, 7, 9, 11, 12, 29, 33, 41], "gpu": [0, 2, 3, 5, 11, 20, 27, 29, 39, 40, 41, 42, 45], "488gb": 0, "want": [0, 1, 2, 3, 4, 12, 22, 29, 31, 35], "enumer": [0, 17], "shirt": [0, 4], "size": [0, 4, 10, 16, 17, 20, 29, 33, 36, 42, 45], "gpu_x1": 0, "cpu": [0, 2, 3, 4, 5, 11, 20, 22, 29, 31, 39, 40, 45], "1": [0, 1, 2, 3, 4, 5, 8, 11, 12, 14, 16, 17, 18, 20, 22, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "memmb": [0, 2, 3, 5, 11, 22, 29, 39, 40, 45], "61_000": 0, "16": [0, 3, 12, 16, 29], "2": [0, 2, 3, 5, 8, 11, 12, 17, 20, 27, 28, 29, 31, 33, 39, 40, 41, 42, 45], "122_000": 0, "gpu_x3": 0, "32": [0, 20], "4": [0, 2, 5, 8, 12, 29, 31, 33, 39, 40, 41, 45], "244_000": 0, "gpu_x4": 0, "488_000": 0, "To": [0, 1, 2, 3, 8, 12, 14, 15, 16, 20, 22, 23, 24, 27, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "avail": [0, 8, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "need": [0, 1, 2, 3, 4, 7, 8, 13, 16, 22, 23, 24, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "via": [0, 1, 4, 7, 8, 11, 12, 14, 15, 16, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onc": [0, 1, 5, 8, 12, 22, 23, 24, 27, 29, 30, 34, 41], "manner": 0, "get_named_resourc": [0, 4, 45], "122000": 0, "appdef": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "role": [0, 2, 3, 4, 5, 8, 12, 23, 24, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "test_app": 0, "imag": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 22, 23, 24, 28, 31, 35, 36, 37, 39, 40, 42, 45, 47], "author": [0, 2, 3, 4, 10, 32], "cli": [0, 2, 4, 12, 15, 22, 25, 29, 30, 34, 35, 44, 46], "builtin": [0, 1, 2, 4, 13, 22, 23, 24, 27, 29, 30, 31], "possibl": [0, 1, 2, 4, 24, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "relev": [0, 41], "organ": [0, 8], "wai": [0, 1, 2, 5, 8, 12, 29, 33, 39, 40, 41], "user": [0, 1, 2, 3, 4, 10, 12, 29, 30, 31, 34, 35, 36, 39, 40, 41, 44, 45, 47], "see": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 22, 23, 24, 27, 28, 29, 30, 31, 35, 37, 38, 39, 40, 42, 44, 45, 47], "thei": [0, 2, 4, 5, 8, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "run": [0, 1, 2, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "entrypoint": [0, 2, 3, 5, 8, 10, 11, 12, 23, 24, 30, 34, 41, 44, 45, 46], "my_project": 0, "bar": [0, 2, 3, 8, 11, 14, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "had": 0, "directori": [0, 4, 8, 11, 12, 13, 16, 27, 29, 31, 33, 35, 36, 37, 41, 42, 43, 44, 45, 46, 47], "structur": [0, 1, 4, 8, 34, 45], "project_root": 0, "baz": [0, 31], "And": [0, 2], "singl": [0, 2, 4, 5, 8, 13, 14, 15, 16, 17, 18, 24, 26, 28, 29, 31, 43, 44, 45], "call": [0, 2, 4, 5, 8, 11, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "trainer": [0, 1, 2, 3, 4, 5, 7, 8, 10, 17, 19, 22, 28, 30, 33, 40, 45], "were": [0, 2, 3, 12, 29, 30], "foo": [0, 2, 3, 11, 14, 28, 29, 30, 31, 33, 39, 40, 43, 45, 47], "search": [0, 20, 31], "modul": [0, 2, 3, 4, 5, 8, 11, 13, 15, 16, 17, 20, 26, 28, 29, 30, 32, 33, 45, 46, 47], "all": [0, 1, 3, 8, 15, 17, 20, 21, 22, 23, 24, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "group": [0, 5, 13, 15, 27, 29, 30, 36, 44, 46], "found": [0, 3, 8, 12, 31, 44], "under": [0, 2, 3, 5, 8, 12, 17, 22, 29, 33, 46, 47], "prefix": [0, 3, 8, 29, 31, 36], "In": [0, 2, 3, 8, 22, 31, 33, 40, 47], "case": [0, 1, 2, 3, 4, 8, 22, 31, 40, 47], "would": [0, 1, 2, 4, 8, 12, 22, 29, 30, 31, 34, 41], "those": [0, 2, 28, 29, 31, 34, 41, 47], "__init__": [0, 16, 18, 19], "attempt": [0, 3, 19, 27, 29, 33, 36], "recurs": [0, 11], "namespac": [0, 14, 17, 20, 22, 29, 31, 39, 40], "without": [0, 1, 3, 15, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "howev": [0, 1, 2, 4, 10, 34, 40, 41, 45], "top": [0, 2, 29, 31, 37, 47], "level": [0, 2, 29, 30, 31, 33, 36, 39, 40, 46], "displai": [0, 39], "test": [0, 11, 13, 16, 17, 23, 28, 34, 39, 41, 43], "app": [0, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "own": [0, 1, 2, 3, 8, 11, 22, 29, 33, 34, 45, 46], "includ": [0, 1, 2, 3, 8, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "its": [0, 2, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "add": [0, 12, 14, 24, 29, 31, 34, 39, 40, 45, 47], "must": [0, 5, 8, 14, 16, 17, 18, 28, 29, 31, 35, 36, 37, 39, 40, 41, 42, 44, 45, 46, 47], "anoth": [0, 22, 33], "back": [0, 12, 33, 35], "e": [0, 2, 5, 8, 11, 15, 17, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "g": [0, 2, 5, 8, 11, 15, 17, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dist": [0, 4, 5, 8, 10, 12, 13, 15, 20, 22, 23, 29, 31, 40, 42, 45], "ddp": [0, 2, 4, 8, 12, 13, 15, 20, 22, 29, 30, 31, 42, 45], "versu": 0, "default": [0, 3, 5, 8, 12, 13, 14, 20, 22, 23, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 45], "two": [0, 2, 5, 12, 15, 22, 29, 33, 36, 39], "registri": [0, 29, 35, 36], "same": [0, 3, 8, 18, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "There": [0, 1, 2, 3], "overlap": 0, "differ": [0, 1, 2, 3, 4, 5, 8, 12, 14, 16, 31, 33, 40, 41, 44], "alias": 0, "concret": 0, "omit": [0, 2, 3, 8, 31], "shorter": 0, "underscor": 0, "_": [0, 14, 16, 18, 29], "_0": 0, "_1": 0, "etc": [0, 3, 15, 29, 34, 42], "exampl": [0, 2, 3, 6, 7, 8, 10, 12, 18, 19, 27, 28, 29, 31, 33, 35, 36, 37, 39, 40, 41, 42, 45, 47], "effect": [0, 4, 5, 17, 20, 29, 30, 35], "expos": [0, 30, 37, 39, 40, 41, 45, 46], "oppos": 0, "vanilla": 0, "11": [0, 12, 29], "3": [0, 3, 5, 8, 12, 16, 17, 18, 20, 23, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "util": [0, 1, 2, 3, 8, 10, 12, 13, 14, 15, 16, 17, 22, 25, 27, 29, 32, 35, 36, 38, 39, 40, 42, 44], "more": [0, 1, 2, 4, 5, 8, 10, 11, 12, 13, 17, 20, 22, 23, 24, 26, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "written": [1, 2, 5, 31], "ani": [1, 4, 8, 11, 12, 15, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "languag": 1, "well": [1, 3, 4, 7, 8, 10, 11, 22, 26, 27, 28, 30, 44], "librari": [1, 2, 8, 11, 12, 16, 20, 29, 32, 36, 43], "allow": [1, 2, 3, 4, 5, 11, 12, 16, 22, 24, 26, 29, 31, 33, 34, 41, 44, 45, 46, 47], "maximum": [1, 7, 29, 40, 44], "flexibl": [1, 2, 4], "do": [1, 2, 3, 4, 8, 10, 13, 15, 18, 29, 30, 39, 41, 45], "standard": [1, 20, 22, 24, 29, 45, 47], "start": [1, 7, 8, 9, 12, 19, 25, 27, 29, 30, 31, 35, 36, 38, 41, 45], "provid": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 17, 20, 22, 26, 28, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47], "consist": [1, 7, 15, 30, 33, 45], "built": [1, 3, 6, 12, 15, 17, 22, 24, 29, 33, 36, 47], "compon": [1, 6, 7, 9, 10, 11, 13, 15, 16, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 38, 39, 40, 41, 42, 44], "applic": [1, 2, 3, 5, 6, 8, 9, 13, 20, 27, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "inform": [1, 2, 4, 8, 10, 29, 30, 34, 35, 36, 39, 45, 46], "how": [1, 2, 3, 4, 6, 7, 8, 10, 12, 13, 15, 22, 23, 24, 28, 29, 30, 31, 35, 36, 40, 45, 46], "handl": [1, 2, 3, 18, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "manag": [1, 9, 22, 29, 36, 41, 45], "fsspec": [1, 2, 7, 9, 11, 14, 15, 16, 17, 18, 20, 22, 29, 46, 47], "pluggabl": [1, 46], "filesystem": [1, 2, 12, 22, 29, 35, 39, 40, 47], "just": [1, 4, 8, 12, 18, 22, 24, 29, 30, 31, 45], "chang": [1, 3, 12, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "input": [1, 2, 17, 29, 33, 36, 45, 46], "output": [1, 11, 14, 17, 19, 20, 22, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "path": [1, 2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 16, 17, 18, 20, 22, 29, 30, 31, 33, 35, 36, 37, 39, 40, 41, 43, 45, 47], "access": [1, 2, 3, 4, 11, 12, 22, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "new": [1, 2, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "environ": [1, 4, 5, 11, 12, 20, 29, 31, 35, 36, 37, 38, 41, 43, 45, 46], "backend": [1, 2, 3, 5, 29, 30, 34, 46], "pytorch": [1, 2, 5, 7, 9, 11, 12, 13, 15, 16, 20, 22, 25, 27, 29, 39, 45], "lightn": [1, 2, 7, 16, 17, 18, 19, 20, 22], "out": [1, 2, 3, 4, 6, 8, 10, 11, 14, 17, 18, 29, 30, 33, 34, 44, 45], "box": [1, 4, 6, 8, 10, 11, 29, 34], "elsewher": 1, "seamless": 1, "integr": [1, 17, 22, 27], "remot": [1, 2, 5, 14, 16, 17, 20, 22, 25, 27, 34, 35, 36, 37, 39, 41, 47], "also": [1, 2, 3, 12, 13, 18, 29, 31, 33, 35, 36, 45], "easier": [1, 4, 9, 32], "transit": [1, 30], "distribut": [1, 2, 8, 10, 13, 15, 20, 21, 24, 25, 27, 28, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "elast": [1, 4, 5, 13, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "lot": 1, "depend": [1, 2, 3, 8, 13, 15, 20, 22, 27, 29, 30, 34, 36, 39, 40, 41, 43, 45, 46, 47], "architectur": [1, 20], "which": [1, 2, 3, 5, 7, 8, 13, 15, 16, 19, 20, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "why": [1, 18, 28], "some": [1, 2, 3, 14, 15, 18, 22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "common": [1, 2, 4, 5], "choic": [1, 8], "pure": [1, 2, 4, 8], "light": 1, "ignit": 1, "log": [1, 7, 12, 15, 18, 20, 22, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "monitor": [1, 7, 20, 29, 36], "job": [1, 2, 4, 5, 11, 12, 13, 15, 23, 25, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "standalon": [1, 2, 8, 12, 18, 29, 30], "tensorboard": [1, 3, 7, 12, 19, 20, 22, 45], "sinc": [1, 2, 3, 4, 5, 7, 8, 28, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "nativ": [1, 37], "like": [1, 2, 4, 12, 17, 22, 24, 27, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "s3": [1, 2, 9, 11, 22, 29, 33, 36, 45, 46], "gc": 1, "view": 1, "complex": [1, 4, 24, 26], "about": [1, 2, 5, 10, 27, 30, 33, 40], "while": [1, 2, 12, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "within": [1, 5, 8, 22, 27, 29, 30, 33, 34, 35, 36, 45, 46], "period": [1, 7], "recov": 1, "failur": [1, 45], "restart": [1, 37, 45], "paramet": [1, 2, 3, 4, 5, 7, 8, 9, 11, 18, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "lose": 1, "progress": [1, 7, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "thing": [1, 3, 4, 29, 33], "transfer": [1, 12], "resum": 1, "command": [1, 3, 8, 10, 11, 29, 31, 37, 45, 46], "line": [1, 3, 10, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "argument": [1, 2, 5, 8, 10, 11, 20, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "transient": 1, "error": [1, 3, 11, 12, 13, 29, 41, 45], "continu": [1, 14, 16, 17, 26], "later": [1, 16, 31], "adjust": [1, 45], "rate": [1, 20], "load": [1, 2, 12, 16, 17, 20, 22, 29, 31, 33, 35, 36, 38], "less": [1, 29, 41, 44], "code": [1, 2, 4, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 31, 33, 36, 42, 44, 45, 46, 47], "better": [1, 4], "maintain": [1, 2], "number": [1, 5, 6, 8, 11, 12, 14, 16, 20, 29, 35, 36, 40, 41, 45, 46], "similar": [1, 2, 4, 29, 41, 45], "task": [1, 27, 39, 42], "captum": [1, 6, 15, 17], "analys": 1, "result": [1, 3, 4, 11, 17, 22, 28, 29, 30, 33, 34, 36, 41, 45, 46], "interact": [1, 9, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "jupyt": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29], "notebook": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 47], "commun": [1, 42], "hasn": 1, "format": [1, 4, 5, 8, 19, 28, 29, 30, 31, 33, 34, 44, 45], "here": [1, 3, 5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "coupl": 1, "option": [1, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 18, 20, 22, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "might": [1, 4, 29, 36], "ll": [1, 2, 3, 15, 22, 29, 31, 35, 39, 42], "state": [1, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dict": [1, 4, 5, 8, 9, 11, 19, 22, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41, 44, 45, 47], "ckpt": [1, 17, 22], "pt": [1, 9, 18], "modelcheckpoint": [1, 20], "hook": [1, 4], "work": [1, 8, 9, 10, 12, 22, 23, 29, 31, 36, 39, 40, 41, 42, 43, 44, 47], "harder": 1, "reusabl": [1, 4], "creat": [1, 2, 3, 4, 6, 7, 11, 12, 17, 18, 27, 29, 31, 34, 35, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47], "serializ": 1, "optim": [1, 18, 19, 29, 33], "execut": [1, 2, 5, 11, 12, 15, 27, 29, 33, 36, 39, 40, 41, 47], "perform": [1, 5, 9, 13, 15, 29, 45], "reli": [1, 3, 4, 5], "gil": 1, "These": [1, 4, 5, 9, 11, 15, 22, 26, 29, 45, 47], "complet": [1, 2, 4, 27, 30, 35, 36, 37, 39, 43, 44, 45], "self": [1, 3, 16, 18, 19, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "automat": [1, 29, 31, 39, 40, 45, 47], "convert": [1, 2, 8, 16, 17, 23, 24, 28, 45], "document": [1, 5, 11, 15, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "export": [1, 12, 18, 20, 22, 27, 29, 36], "quantiz": 1, "version": [1, 8, 12, 15, 18, 20, 29, 30, 39, 40, 46], "both": [1, 3, 4, 5, 8, 29, 30, 45], "full": [1, 3, 4, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 26, 29, 36, 47], "precis": 1, "consum": [1, 4, 12, 17, 22, 29, 30], "9": [1, 12, 27, 29, 30, 40], "0": [1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "separ": [1, 11, 13, 22, 24, 29, 37, 42], "It": [1, 2, 3, 4, 13, 15, 16, 17, 18, 20, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quit": [1, 7], "doesn": [1, 22, 28, 37, 39, 40, 41, 45], "widespread": 1, "adopt": 1, "upload": [1, 14, 18, 21, 22, 23, 24, 29, 36, 47], "api": [1, 2, 3, 4, 9, 13, 22, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "reason": [1, 30], "write": [1, 2, 3, 4, 8, 12, 16, 29, 30, 33, 36, 41, 45], "custom": [1, 3, 4, 6, 10, 22, 25, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "logic": [1, 2, 3, 13, 15, 31, 45], "deploi": [1, 9, 40], "build": [1, 2, 12, 29, 35, 36, 37, 39, 47], "server": [1, 3, 5, 7, 40, 45], "typic": [1, 2, 13, 22, 23, 24, 31, 33, 41, 45], "unit": 1, "other": [1, 2, 3, 4, 8, 12, 16, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "unittest": 1, "main": [1, 2, 4, 5, 11, 12, 13, 14, 15, 17, 20, 29, 33, 40, 41], "customapptest": 1, "testcas": 1, "test_main": 1, "none": [1, 3, 4, 5, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "src": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "dst": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "asserttru": 1, "high": [2, 30, 31], "behind": 2, "check": [2, 3, 12, 18, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quickstart": [2, 8, 12, 25, 39], "guid": [2, 8, 12, 25, 29, 40], "workspac": [2, 12, 13, 25, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "patch": [2, 13, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "tool": [2, 3, 9, 44, 45], "submit": [2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "stage": [2, 8, 11, 16, 19, 35, 38, 42], "ml": [2, 13, 25, 29, 36, 46], "platform": [2, 8, 24, 29], "abstract": [2, 22, 34, 41, 45, 46, 47], "uml": 2, "diagram": [2, 8, 30], "simpli": [2, 3, 8, 11, 14, 15, 31, 34, 41], "struct": 2, "actual": [2, 9, 13, 15, 22, 30, 31, 33, 34, 41, 45], "lingo": 2, "jobdefinit": 2, "yaml": [2, 21, 22, 23, 24, 28, 29, 39, 40], "disambigu": 2, "between": [2, 11, 16, 20, 29, 33, 35, 36, 45], "binari": [2, 3, 5, 11, 12, 29, 33, 41], "refer": [2, 3, 8, 15, 34, 43, 44, 45, 47], "understood": [2, 3], "simpl": [2, 3, 4, 8, 10, 12, 13, 14, 15, 17, 18, 29, 33, 34, 45], "echo": [2, 3, 8, 11, 12, 23, 24, 27, 35, 36, 38, 39, 40, 42, 44], "hello": [2, 3, 8, 10, 11, 13, 15, 23, 24, 25, 27, 31, 35, 36, 38, 39, 40, 44], "world": [2, 8, 11, 20, 31], "name": [2, 3, 5, 8, 9, 11, 12, 18, 20, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "bin": [2, 3, 23, 24], "tmp": [2, 3, 11, 12, 14, 17, 20, 22, 27, 28, 29, 33, 41], "arg": [2, 3, 4, 8, 11, 12, 14, 16, 17, 20, 22, 23, 24, 28, 29, 30, 33, 34, 36, 41, 44, 45, 47], "num_replica": [2, 3, 4, 8, 11, 23, 28, 29, 41, 42, 45], "As": [2, 7, 8, 10, 13, 24, 33], "dataclass": 2, "encod": [2, 33, 45], "pass": [2, 3, 4, 5, 11, 12, 16, 20, 28, 29, 30, 31, 33, 37, 39, 40, 41, 44, 45, 47], "few": [2, 3, 8, 29, 33, 34], "varieti": [2, 5], "topolog": [2, 5], "mean": [2, 3, 19, 29, 31, 33, 35, 40], "multipl": [2, 3, 4, 5, 8, 23, 24, 30, 31, 34, 40, 41, 45], "repres": [2, 8, 28, 31, 43, 45], "non": [2, 4, 27, 34, 39, 45], "homogen": [2, 5], "coordin": [2, 5, 29, 33, 45], "mani": [2, 10, 30, 34], "worker": [2, 5, 11, 20, 22, 24, 29, 33, 43, 45], "doc": [2, 4, 8, 12, 13, 23, 28, 29, 35, 38, 39, 40, 42, 45, 47], "what": [2, 8, 22, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "field": [2, 3, 4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "good": 2, "scratch": [2, 4], "rather": [2, 3, 8, 13, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "templet": [2, 5, 8], "think": [2, 8], "conveni": [2, 3, 30, 45], "factori": [2, 4, 8, 28, 34, 43, 45, 46], "method": [2, 4, 8, 17, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "unlik": [2, 31, 45], "map": [2, 5, 8, 28, 30, 31, 33, 45, 47], "granular": 2, "vari": [2, 22], "abov": [2, 3, 8, 14, 27, 45], "readi": [2, 11, 25], "hardcod": 2, "data": [2, 5, 16, 17, 20, 22, 29, 33, 34, 36, 42, 45], "parallel": [2, 5, 15, 22, 29, 35, 45], "style": [2, 4, 5, 13, 15, 29, 31, 45], "node": [2, 3, 4, 5, 8, 15, 20, 22, 24, 28, 29, 30, 35, 39, 40, 41, 45], "jobnam": 2, "nnode": [2, 5, 8, 29], "int": [2, 3, 4, 5, 7, 8, 11, 14, 16, 18, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "script_arg": [2, 5, 8], "single_gpu": 2, "resourc": [2, 3, 5, 11, 12, 22, 23, 28, 29, 30, 34, 35, 36, 39, 40, 41, 44], "1024": [2, 5, 11, 22, 29, 45], "parameter": 2, "up": [2, 4, 8, 22, 27, 28, 29, 30, 31, 33, 38, 40, 41, 45], "effort": [2, 33], "than": [2, 4, 5, 8, 13, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "try": [2, 16, 30, 39], "over": [2, 5, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "gener": [2, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 36, 39, 44, 47], "everyth": [2, 3], "easi": [2, 5, 16, 22, 33], "cheap": 2, "base": [2, 3, 4, 8, 11, 12, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "repetit": 2, "protip": 2, "composit": 2, "achiev": 2, "purpos": [2, 8, 12, 17, 22, 29, 33, 41], "dsl": [2, 23, 24, 28], "section": [2, 8, 31, 45, 46], "understand": [2, 4, 6, 24, 26, 33], "context": [2, 8, 12, 29, 35, 47], "befor": [2, 3, 4, 7, 15, 22, 30, 45, 47], "brows": [2, 3, 8, 27, 29], "fit": [2, 3, 20, 29, 36], "doe": [2, 3, 8, 9, 11, 12, 13, 15, 16, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "exactli": 2, "expect": [2, 5, 34, 38, 40, 41, 44, 45, 47], "launch": [2, 3, 5, 7, 8, 12, 13, 14, 15, 17, 22, 23, 24, 26, 27, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onto": [2, 3, 14, 31, 34, 35], "app_spec": 2, "programmat": [2, 4, 12, 27, 29, 41, 43, 44, 47], "get_runn": [2, 8, 27, 30, 31], "appspec": [2, 35, 36, 37, 39, 43, 44], "list": [2, 4, 5, 8, 11, 14, 17, 18, 20, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "plug": 2, "workflow": [2, 3, 8, 11, 16, 27, 33], "specif": [2, 3, 5, 6, 24, 26, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "target": [2, 8, 17], "kubeflow": [2, 25, 26], "whatev": 2, "represent": 2, "kfp": [2, 21, 22, 23, 24], "containerop": [2, 24, 28], "accur": 2, "advanc": [2, 13, 21, 23, 24, 25, 29], "especi": [2, 4], "mini": 2, "control": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "flow": 2, "hpo": [2, 11, 19, 46], "sub": [2, 5, 29, 30, 32, 33], "inlin": [2, 24], "exact": [2, 3, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "semant": [2, 8, 30, 34, 47], "dynam": 2, "upstream": [2, 8], "take": [2, 3, 5, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "advantag": [2, 46], "featur": [2, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "tri": [2, 24], "canon": 2, "portabl": 2, "skip": [2, 17, 31, 47], "zero": [2, 17, 45], "echo_torchx": 2, "becaus": [2, 3, 13, 27, 31, 45], "essenti": [2, 3], "anywher": [2, 33], "agnost": [2, 20, 32], "fashion": [2, 22], "layer": [2, 12, 20, 29], "touch": [2, 11, 12], "infra": [2, 29, 36], "NOT": [2, 3, 16, 30, 31, 33, 41, 45], "boto3": [2, 35, 36], "input_path": [2, 14, 22], "session": [2, 30, 45, 46], "client": [2, 3, 12, 22, 23, 24, 34, 35, 36, 38, 39, 40, 41], "s3_input_path": 2, "split": [2, 13, 15], "bucket": [2, 9, 29, 33, 36], "kei": [2, 29, 31, 33, 36, 45, 46], "join": [2, 14, 16, 17, 18, 20, 22], "download_fil": 2, "torch": [2, 4, 5, 8, 13, 15, 16, 17, 18, 20, 29, 40, 45], "rest": 2, "breviti": [2, 3, 8, 31], "implicit": 2, "assumpt": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "One": [2, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "storag": [2, 3, 7, 16, 22, 29, 36, 39, 40, 46], "introduc": 2, "system": [2, 29, 35, 40], "framework": 2, "alreadi": [2, 3, 12, 19, 29, 30, 31], "io": [2, 5, 7, 9, 11, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "hood": [2, 5, 8, 33], "rewritten": 2, "pytorch_lightn": [2, 16, 18, 19, 20], "input_url": 2, "fs": [2, 14, 16, 18, 46, 47], "get_filesystem": 2, "open": [2, 14, 16, 17, 22, 23, 24, 33], "rb": [2, 14], "f": [2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "now": [2, 3, 12, 31], "compat": [2, 13, 15, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46], "store": [2, 5, 22, 29, 31, 33, 35, 36, 45], "variou": [2, 8, 15, 31, 46], "With": [2, 27, 40], "exist": [2, 4, 7, 12, 14, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "find": [2, 3, 31, 45], "pointer": 2, "ideal": 2, "time": [2, 3, 4, 7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "intend": [2, 24, 30, 33, 43, 45], "But": 2, "proper": 2, "perman": 2, "home": [2, 8, 12, 29, 30, 31], "even": [2, 3, 4, 41], "entir": [2, 45], "oss": [2, 18, 20], "until": [2, 4, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "happen": 2, "matur": 2, "commandlin": [3, 45, 47], "around": [3, 16, 22, 33], "runner": [3, 4, 8, 12, 25, 26, 27, 29, 31, 34, 35, 36, 43], "directli": [3, 4, 7, 8, 17, 22, 27, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "pipelin": [3, 7, 12, 15, 27, 29, 33, 45], "aka": [3, 30], "quickli": [3, 26], "iter": [3, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "incur": 3, "technic": 3, "cognit": 3, "overhead": 3, "deal": [3, 22, 33, 45], "doubt": 3, "help": [3, 4, 8, 12, 14, 15, 17, 20, 22, 29, 34, 38, 40, 45, 47], "consid": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "n": [3, 5, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "config": [3, 13, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "metric": [3, 4, 12, 19, 22, 25, 29, 30, 36, 46], "serv": [3, 12, 22, 25], "torchserv": [3, 9, 12, 18, 22], "get": [3, 8, 12, 16, 20, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "runopt": [3, 29, 30, 31, 34, 38, 40, 45, 47], "local_dock": [3, 12, 29, 31, 37, 45], "log_dir": [3, 12, 27, 29, 31, 41], "dir": [3, 7, 12, 13, 27, 29, 31, 41], "stdout": [3, 5, 11, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stderr": [3, 5, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica": [3, 5, 11, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "local_cwd": [3, 4, 5, 8, 12, 14, 17, 20, 25, 27, 29, 30, 31, 41, 44, 45], "slurm": [3, 4, 25, 34], "subcommand": [3, 8, 31, 46], "either": [3, 4, 8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "sched_nam": 3, "along": [3, 7, 8, 30], "cat": [3, 31], "my_trainer_spec": 3, "my_train": [3, 45], "detail": [3, 4, 20, 42], "chose": [3, 5, 29, 31, 34], "three": 3, "scheduler_arg": [3, 35, 39, 40], "known": [3, 11, 31, 34, 39], "run_opt": [3, 34, 38, 40], "run_config": 3, "each": [3, 4, 5, 11, 19, 20, 21, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "shown": [3, 31, 36], "comma": [3, 29, 31, 37, 45], "delimit": [3, 8, 20, 31, 45], "k": [3, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "v": [3, 15], "pair": [3, 31, 45], "seen": [3, 8], "usag": [3, 8, 12, 27, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "h": [3, 5, 8, 11, 29, 45], "msg": [3, 8, 11, 12, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45], "messag": [3, 8, 11, 27, 29, 34, 45], "show": [3, 8, 22, 27, 28, 29, 31], "exit": [3, 4, 7, 8, 12, 27, 29, 45], "put": [3, 14, 18, 27, 33], "togeth": [3, 23, 24, 44], "2022": 3, "06": [3, 12], "15": [3, 12, 29], "08": 3, "57": 3, "info": [3, 4, 5, 11, 12, 17, 22, 23, 24, 27, 28, 29, 30, 34, 35, 36, 37, 39, 40, 44], "locat": [3, 11, 29, 36, 38, 41, 43, 46], "crls3hcpwjmhc": 3, "By": [3, 41], "block": [3, 4, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "finish": [3, 12, 29, 36, 43], "instead": [3, 4, 5, 12, 27, 29, 30, 33, 34, 39, 43, 45, 47], "print": [3, 10, 12, 13, 14, 16, 17, 18, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "url": [3, 9, 11, 14, 33, 34, 45], "form": [3, 8, 44, 45], "scheduler_nam": [3, 31], "job_id": [3, 46], "keep": [3, 4, 31, 34], "note": [3, 4, 5, 8, 11, 12, 14, 15, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "identifi": [3, 18, 29, 34, 35, 45, 47], "regist": [3, 5, 9, 11, 29, 30, 31, 34, 45], "debug": [3, 5, 29, 36], "request": [3, 12, 29, 30, 34, 36, 39, 40, 41, 44, 45], "hello_world": [3, 12, 42, 45], "metadata": [3, 12, 22, 28, 29, 45, 46], "env": [3, 5, 11, 27, 29, 31, 37, 41, 44, 45], "max_retri": [3, 5, 11, 29, 39, 40, 44, 45], "port_map": [3, 28, 45], "capabl": [3, 5, 39, 40, 45, 47], "retry_polici": [3, 45], "retrypolici": [3, 45], "popenrequest": [3, 41], "app_id": [3, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "echo_c944ffb2": 3, "torchx_asmtmyqj": 3, "torchx_kiuk": 3, "role_param": [3, 41], "replicaparam": [3, 41], "torchelastic_error_fil": 3, "json": [3, 33, 34, 43, 45, 46], "role_log_dir": [3, 41], "look": [3, 29, 30, 31, 33, 45], "faux": 3, "local": [3, 5, 7, 12, 14, 16, 17, 20, 22, 25, 29, 30, 33, 34, 36, 37, 44, 47], "subprocess": [3, 18, 41], "popen": [3, 41], "simul": [3, 46], "posix": 3, "process": [3, 5, 13, 14, 15, 17, 22, 29, 34, 41], "nevertheless": 3, "valuabl": 3, "insight": 3, "translat": 3, "particular": [3, 4, 8, 31], "invers": 3, "That": [3, 45], "app_handl": [3, 30, 34, 45], "recreat": [3, 34, 39], "descript": [3, 8, 12, 14, 17, 20, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "alwai": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "100": [3, 12, 22, 30, 41], "wa": [3, 19, 27, 30, 34, 44, 45], "extent": [3, 30], "numer": [3, 33], "factor": 3, "describe_job": 3, "whether": [3, 5, 8, 29, 35, 36, 37, 39, 43, 45], "ignor": [3, 5, 11, 17, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "never": [3, 16, 19], "spot": [3, 29, 36], "filter": [3, 30], "down": [3, 44], "larg": [3, 29, 33, 46], "long": [3, 30, 45], "retain": [3, 29, 36], "archiv": [3, 9, 18], "behalf": [3, 41], "get_log": 3, "obtain": 3, "manual": [3, 4, 27, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "retent": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "properli": [3, 13], "wrapper": [3, 16], "let": [3, 8, 12, 14, 22, 29, 30], "pull": [3, 12, 41, 45], "place": [3, 4, 16, 17, 20, 22, 29, 40, 42, 44], "pattern": [3, 29, 30, 37, 45], "explanatori": 3, "id": [3, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "tail": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "still": [3, 8, 33, 46], "regex": [3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "except": [3, 16, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "role_nam": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica_id": [3, 39, 40, 44, 45], "rank": [3, 20, 30, 33], "side": [3, 7], "appli": [3, 28, 29, 31, 37, 39, 40, 45, 47], "veri": [3, 6, 18, 29], "tax": 3, "host": [3, 5, 8, 11, 29, 30, 35, 36, 37, 39, 40, 41, 42, 45], "pleas": [3, 27, 29, 34, 42, 45], "judgment": 3, "status": [3, 45], "further": [3, 29, 34], "a5qvfhe1hyq2w": 3, "succeed": [3, 12, 29, 45], "d796ei2tdtest": 3, "em0iao2m90000": 3, "fail": [3, 12, 30, 37, 39, 45], "ew33oxmdg0123": 3, "design": [4, 25, 26, 27, 45], "deviat": 4, "necessari": [4, 15, 30, 34, 41, 45], "m": [4, 5, 8, 11, 12, 18, 22, 29], "docker": [4, 5, 8, 12, 22, 25, 34, 35, 36, 45, 47], "resolut": [4, 30], "isn": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "folder": [4, 14, 16, 18, 47], "regardless": 4, "img_nam": 4, "img_vers": 4, "reus": [4, 12, 16], "hard": [4, 32], "sort": 4, "manipul": 4, "imposs": 4, "convent": [4, 33], "avoid": [4, 29, 30], "where": [4, 5, 8, 11, 22, 26, 29, 31, 33, 36, 43, 44, 45, 46], "feel": 4, "statement": 4, "prefer": [4, 34, 39, 40, 41, 45], "trainer_test": 4, "_trainer": 4, "trainer_prod": 4, "10": [4, 12, 20, 29, 30, 40, 45], "ref": 4, "overview": [4, 25], "memori": [4, 5, 11, 29, 39, 40, 42, 44, 47], "alloc": [4, 22, 30, 34, 39, 40, 41, 44, 45], "independ": [4, 40], "schedul": [4, 5, 8, 11, 12, 13, 14, 15, 17, 20, 22, 23, 26, 27, 28, 30, 31, 32, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "behavior": [4, 7, 26, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "altern": [4, 30, 46], "merg": 4, "could": [4, 45], "ui": [4, 22, 23, 24, 28, 45, 46], "sidecar": 4, "servic": [4, 7, 22, 29, 34, 39, 40, 46], "re": [4, 22, 25, 27, 34, 39, 40, 45], "comput": [4, 18, 20, 35], "extend": [4, 46], "dictionari": [4, 29, 34, 36], "figur": [4, 18], "static": [4, 22, 42, 45], "pyre": [4, 16, 17, 18], "mypi": 4, "normal": [4, 12, 14, 15, 16, 22, 27, 29], "valid": [4, 11, 13, 15, 22, 30, 33, 34, 41, 45], "componenttestcas": 4, "ensur": [4, 13, 17, 20, 34], "pars": [4, 30, 33, 45], "stricter": 4, "component_test_bas": 4, "methodnam": 4, "runtest": 4, "sourc": [4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "run_compon": [4, 27, 30], "callabl": [4, 16, 41, 45, 46], "scheduler_param": [4, 30], "interv": [4, 30], "float": [4, 7, 8, 11, 18, 19, 20, 30, 31, 33, 45, 47], "timeout": [4, 7, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "appstatu": [4, 30, 45], "helper": [4, 47], "hide": 4, "poll": [4, 7, 30], "reach": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "compplet": 4, "max": [4, 5, 45], "fixtur": 4, "exercis": 4, "teardown": [4, 16], "deconstruct": 4, "after": [4, 8, 17, 22, 29, 31, 36, 45], "function_nam": [4, 30], "fn": [4, 45], "bash": [4, 11, 44], "script": [4, 5, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 29, 42, 44], "core": [4, 24, 45], "gang": [5, 37, 39, 40], "copi": [5, 11, 12, 22, 29, 37, 43, 45], "leverag": [5, 22, 24, 29], "express": [5, 29, 36], "overal": 5, "wise": 5, "wherea": 5, "num": [5, 29, 45], "assum": [5, 8, 17, 22, 29, 33, 35, 39, 40, 41], "x": [5, 18, 29, 42], "j": [5, 15, 20, 22, 29, 31, 42], "1x4": 5, "total": [5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 30, 41], "2x4": 5, "rdzv_port": [5, 29], "master": [5, 22, 40], "port": [5, 7, 29, 45], "29500": [5, 29], "cfg": [5, 12, 13, 14, 20, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "queue": [5, 14, 23, 28, 29, 31, 35, 39, 42], "autosc": 5, "minimum": [5, 30, 39, 40, 45], "5": [5, 11, 12, 14, 16, 17, 29, 45], "5x8": 5, "compar": 5, "torchelast": [5, 29, 45], "read": [5, 16, 22, 23, 24, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "ghcr": [5, 7, 9, 11, 12, 29, 31], "0dev0": [5, 7, 9, 11, 12, 29], "1x2": [5, 15, 20, 29, 31], "rdzv_backend": [5, 8, 29], "c10d": [5, 8, 29], "mount": [5, 11, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "bool": [5, 8, 9, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "fals": [5, 8, 9, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "tee": [5, 29], "multi": [5, 8, 24, 29, 35, 39, 40, 43], "rendezv": [5, 29], "rendezvous_endpoint": [5, 29], "rank_0_host": [5, 29], "instruct": [5, 12, 15, 17, 29, 39, 40], "free": [5, 29, 30, 34, 41, 45], "random": [5, 16, 17, 20, 29], "mutual": [5, 11, 29, 45], "exclus": [5, 11, 29, 45], "preced": [5, 11, 29, 31, 41], "overrid": [5, 29, 30, 31, 34, 41, 43], "experimentnam": [5, 29], "runnam": [5, 29], "per": [5, 8, 11, 20, 22, 29, 33, 39, 41, 43, 44], "mb": [5, 11, 29, 45], "min_nnod": [5, 29], "nproc_per_nod": [5, 8, 29], "exce": [5, 29], "varibl": [5, 11, 29], "env1": [5, 11, 29, 37], "v1": [5, 8, 11, 12, 28, 29, 37, 39, 40, 45], "env2": [5, 11, 29, 37], "v2": [5, 8, 11, 29, 37, 45], "env3": [5, 11, 29, 37], "v3": [5, 8, 11, 29, 37, 45], "retri": [5, 11, 29, 39, 40, 41, 45], "rank0": [5, 29], "chosen": [5, 29], "ex": [5, 11, 29, 35, 36, 37, 39, 40, 45], "bind": [5, 11, 29, 35, 37, 39, 40, 42, 45], "volum": [5, 11, 29, 35, 36, 37, 39, 40, 45], "readonli": [5, 11, 29, 35, 37, 39, 40, 45], "preset": [5, 29], "flag": [5, 8, 29], "enabl": [5, 12, 29, 36, 38, 40, 44, 46], "std": [5, 29], "stream": [5, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "consol": [5, 29], "_torch_debug_flag": 5, "commonli": 5, "variabl": [5, 11, 29, 31, 34, 36, 37, 41, 43, 45], "cuda_launch_block": 5, "nccl_desync_debug": 5, "torch_distributed_debug": 5, "torch_show_cpp_stacktrac": 5, "model": [6, 7, 9, 10, 15, 16, 20, 22, 27, 28, 29, 33, 36, 41, 46], "often": [6, 10, 33, 46], "thu": [6, 39, 40, 45], "analyz": [6, 17], "render": [6, 7], "cloud": [7, 16, 22, 38, 39, 40, 42], "Or": [7, 31], "part": [7, 12, 15, 19, 21, 24, 26, 28, 30, 33, 45], "tensorboardlogg": [7, 20], "tutori": [7, 11, 17], "http": [7, 9, 12, 13, 14, 15, 17, 22, 23, 28, 29, 30, 35, 38, 39, 40, 42, 44, 45, 47], "intermedi": [7, 12, 29], "tensorboard_tutori": 7, "html": [7, 9, 29, 35, 44, 45], "logger": [7, 19, 20], "readthedoc": 7, "en": [7, 23, 28, 42], "stabl": [7, 29], "extens": 7, "logdir": 7, "3600": 7, "6006": 7, "start_on_fil": 7, "exit_on_fil": 7, "termin": [7, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "condit": 7, "caus": [7, 17], "trigger": 7, "correspond": [7, 28, 31, 45], "second": [7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 36, 44], "shutdown": 7, "illustr": 8, "Not": [8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "current": [8, 11, 12, 22, 28, 29, 31, 36, 39, 41, 43, 44, 45, 46, 47], "collect": [8, 13, 15, 27, 29], "categori": 8, "our": [8, 12, 16, 17, 20, 23, 24, 29], "page": 8, "ve": [8, 22], "being": [8, 29, 47], "downstream": [8, 30], "o": 8, "sure": [8, 22, 30, 31, 45], "rule": [8, 45, 47], "thumb": 8, "familiar": 8, "yourself": 8, "pep": 8, "484": 8, "annot": [8, 34, 45], "primit": [8, 45], "primitive_kei": 8, "primitive_valu": 8, "var_arg": 8, "docstr": [8, 45], "googl": [8, 12, 29, 38, 45], "function_with_pep484_type_annot": 8, "autogener": 8, "pick": [8, 31], "simplifi": 8, "os": [8, 14, 16, 17, 18, 20, 22, 41, 46], "aws_p3": [8, 45], "2xlarg": [8, 45], "basenam": [8, 14], "rdzv_endpoint": 8, "localhost": [8, 12, 15, 25, 41], "5900": 8, "nprocs_per_nod": 8, "save": [8, 14, 16, 17, 18, 20, 27, 29, 33, 36], "torchx_param": 8, "tip": [8, 31, 45], "improv": [8, 45], "posit": [8, 29], "dep": [8, 27], "machin": [8, 10, 39, 40, 45], "bodi": [8, 31], "Then": [8, 31], "reflect": [8, 47], "correctli": [8, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "easiest": 8, "dryrun": [8, 9, 30, 47], "linter": 8, "dist_test": 8, "ident": [8, 29, 41, 46], "fact": 8, "walk": [8, 14, 16, 47], "though": 8, "basic": [8, 12, 25, 34, 38, 40, 47], "invok": [8, 41, 45], "regular": [8, 14, 15, 29, 36], "component_modul": 8, "component_fn": 8, "rel": [8, 12, 13, 29, 30, 36, 41], "d": [8, 12, 29, 31], "drop": [8, 31], "slightli": [8, 29], "syntax": [8, 24], "component_path": [8, 30], "bob": [8, 31], "absolut": [8, 29, 30, 36, 41], "shell": [8, 44], "expans": 8, "cwd": [8, 29, 31, 41], "cd": [8, 15, 31], "know": [8, 22, 29, 45], "straight": 8, "forward": [8, 18], "program": [8, 11, 12, 14, 17, 29], "doubl": [8, 13], "dash": 8, "param_nam": 8, "param1": 8, "argpars": [8, 12, 14, 17, 20, 22], "parser": [8, 12, 14, 17, 20, 22], "summari": [8, 19], "imagin": 8, "comp": 8, "i": [8, 16, 17, 27, 29], "b": [8, 31], "l": 8, "vararg": [8, 31], "true": [8, 12, 14, 17, 18, 20, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "k1": 8, "k2": 8, "k3": 8, "c": [8, 10, 11, 29, 31, 43], "henc": [8, 14, 31, 33, 45, 46], "end": [8, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "arg1": 8, "arg2": 8, "arg3": 8, "adapt": [8, 22, 23, 24, 26, 28, 35, 45, 46], "orchestr": [8, 27], "expositori": [8, 13], "quick": [8, 16], "practic": [8, 10], "aim": 9, "infer": [9, 18, 20, 22, 29, 36], "model_path": [9, 22], "management_api": [9, 22], "param": [9, 22, 27, 45], "endpoint": [9, 22, 29, 36], "8081": [9, 22, 45], "root": [9, 14, 15, 16, 31, 41, 43, 45], "loop": 10, "construct": [10, 30, 45, 46], "emb": 10, "limit": [10, 11, 14, 16, 22, 29, 33, 41, 46], "smaller": 10, "sy": [10, 11, 12, 14, 17, 20, 22, 29], "argv": [10, 11, 12, 14, 17, 20, 22, 29], "cp": [11, 42], "meant": 11, "materi": [11, 44], "glue": 11, "oper": [11, 13, 15, 22, 23, 28, 29, 33, 45, 46], "meaning": 11, "sh": [11, 12, 23, 27, 28, 29, 39], "substitut": [11, 45], "destin": 11, "torchx_utils_python": [11, 29], "length": [11, 29], "booth": [11, 12], "x1": 11, "x2": 11, "trial_idx": 11, "tracker_bas": [11, 33], "evalu": [11, 29, 30, 36], "7": [11, 12, 29], "fsspecresulttrack": [11, 33], "outdir": 11, "uri": [11, 29, 33, 36], "tracker": [11, 12, 25, 27, 29, 33], "torchx_utils_binari": 11, "off": [12, 29], "anyth": [12, 29, 41], "writefil": [12, 29], "my_app": [12, 25, 29], "__name__": [12, 13, 14, 17, 20, 33], "__main__": [12, 13, 14, 17, 20, 33], "argumentpars": [12, 14, 17, 20, 22], "add_argu": [12, 14, 17, 20, 22], "person": [12, 31], "greet": 12, "parse_arg": [12, 14, 17, 20, 22], "friendli": 12, "my_compon": [12, 30, 31], "latest": [12, 28, 29, 35, 36, 39, 40, 41, 42, 45], "greeter": 12, "2024": [12, 27, 29], "07": [12, 27, 29], "23": [12, 27, 29], "49": [12, 29], "44": 12, "temporari": [12, 27, 29], "delet": [12, 27, 29], "preserv": [12, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "torchx_kc6luplh": 12, "wait": [12, 27, 29, 30, 34, 36, 43], "45": [12, 29], "vmjbbm6c3jv2kc": 12, "won": [12, 29, 44], "colab": [12, 29], "com": [12, 15, 22, 29, 35, 36, 37, 38, 39, 40, 42, 44, 45, 47], "dockerfil": [12, 29, 47], "0rc1": 12, "34": [12, 29, 40], "driver": [12, 45], "intern": 12, "36b": 12, "0s": 12, "99b": 12, "1s": 12, "4s": 12, "dockerignor": [12, 47], "2b": 12, "425b": 12, "sha256": [12, 29, 47], "a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3": 12, "resolv": [12, 29, 30, 41, 45], "4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca": 12, "19mb": 12, "26": [12, 29], "70mb": 12, "25kb": 12, "3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c": 12, "21kb": 12, "d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726": 12, "857b": 12, "6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203": 12, "0b": 12, "94mb": 12, "143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907": 12, "00gb": 12, "889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f": 12, "189b": 12, "20": [12, 29], "97mb": 12, "3s": 12, "2s": 12, "eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa": 12, "132b": 12, "06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77d": 12, "257b": 12, "d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71": 12, "05mb": 12, "21": [12, 29, 39], "46mb": 12, "extract": [12, 16, 29, 36], "5s": 12, "12": [12, 29, 41], "58mb": 12, "f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6": 12, "71gb": 12, "30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2": 12, "352b": 12, "6s": 12, "c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932": 12, "92b": 12, "909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233": 12, "341": 12, "29mb": 12, "7s": 12, "109": 12, "20mb": 12, "9s": 12, "25": [12, 27, 29], "17mb": 12, "50": [12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "33mb": 12, "211": 12, "81mb": 12, "66mb": 12, "76": 12, "73mb": 12, "96": 12, "47mb": 12, "115": 12, "34mb": 12, "146": 12, "80mb": 12, "337": 12, "64mb": 12, "213": 12, "91mb": 12, "192": 12, "249": 12, "56mb": 12, "312": 12, "48mb": 12, "314": 12, "57mb": 12, "333": 12, "45mb": 12, "8s": 12, "469": 12, "76mb": 12, "422": 12, "583": 12, "01mb": 12, "f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800": 12, "563": 12, "38kb": 12, "511": 12, "71mb": 12, "688": 12, "27mb": 12, "830": 12, "88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968": 12, "556": 12, "96kb": 12, "603": 12, "98mb": 12, "940": 12, "699": 12, "26mb": 12, "6": [12, 29, 39], "04gb": 12, "787": 12, "14gb": 12, "889": 12, "25gb": 12, "981": 12, "35gb": 12, "09gb": 12, "46gb": 12, "20gb": 12, "57gb": 12, "28gb": 12, "38gb": 12, "67gb": 12, "78gb": 12, "48gb": 12, "89gb": 12, "58gb": 12, "68gb": 12, "13": [12, 16, 27, 29, 40], "30": [12, 29, 36, 43], "31": [12, 29], "17": [12, 29], "83": 12, "59cbabfc4bc4273f19893e6f5be46e117b968e6aa345e1004ca8a4ae7bd1650c": 12, "51": 12, "29": [12, 29], "disabl": [12, 29, 31, 36, 44], "warn": [12, 27, 29, 34], "fall": 12, "404": 12, "tag": [12, 27, 29, 35, 36, 47], "amp": 12, "fromimag": 12, "deni": 12, "repositori": [12, 29, 35, 36, 37, 39, 40, 47], "39": [12, 27, 29], "login": [12, 47], "step": [12, 22, 24, 25, 45, 46], "gt": [12, 27, 29], "59cbabfc4bc4": 12, "f0106ecc69d3": 12, "label": [12, 17, 29, 36], "d485ca6a242a": 12, "38": [12, 29], "remov": [12, 27, 29, 30, 39], "1cfc2f00125a": 12, "successfulli": [12, 29, 30, 45], "1cfc2f00125a37863f8127231761aea50b5299c8ce40597b9bdff1607319d5f4": 12, "origin": [12, 29, 35, 36, 37, 39, 43, 44, 45], "41": [12, 29], "42": [12, 29], "nmczf0v3x19t0": 12, "push": [12, 29, 35, 36, 37, 39, 40, 47], "premad": 12, "discov": 12, "spmd": 12, "54": [12, 27, 29], "57dff228bc2f": [12, 29], "6c9063465dd1": 12, "fd424fee4fb1": 12, "94146e13e9a6": 12, "94146e13e9a69197f4f54316c8bed7d2cdf6ffcc6607a935eb7a316c1c5629b5": 12, "qv026x0nr27f0c": 12, "click": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "download": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 36, 41, 42], "minim": [13, 15], "initi": [13, 15, 16, 20, 29, 36, 43, 45], "all_reduc": [13, 15, 29], "enough": [13, 15], "compute_world_s": [13, 15], "submodul": 13, "e2": [13, 25], "diff": [13, 47], "hydra": 13, "stack": 13, "been": [13, 17, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "seriou": 13, "omegaconf": 13, "dictconfig": 13, "multiprocess": 13, "record": [13, 19, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "to_yaml": 13, "throw": [13, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "rais": [13, 19, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "runtimeerror": 13, "compos": [13, 14, 16], "ipython": 13, "pwd": 13, "ab": 13, "cc": 13, "jupyter_notebook": 13, "initialize_config_modul": 13, "config_modul": 13, "config_nam": 13, "minut": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 44], "000": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "ipynb": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "galleri": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "sphinx": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "torchvis": [14, 15, 16, 18, 29], "reupload": [14, 15], "datapreproc": [14, 17, 22], "cs231n": [14, 22], "stanford": [14, 22], "edu": [14, 22], "tini": [14, 15, 16, 22], "imagenet": [14, 15, 16, 22], "200": [14, 18, 22], "zip": [14, 15, 21, 22], "output_path": [14, 16, 17, 20, 22, 29, 36], "tarfil": [14, 16], "tempfil": [14, 17, 20], "zipfil": 14, "pil": [14, 16], "transform": [14, 16, 22, 26, 28], "dataset": [14, 15, 17, 18], "is_image_fil": [14, 16], "tqdm": [14, 16, 29], "tar": [14, 16, 45], "gz": [14, 16], "download_and_extract_zip_arch": 14, "r": [14, 15, 16, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "zip_ref": 14, "extractal": [14, 16], "temporarydirectori": [14, 17, 20], "tmpdir": [14, 16, 17, 18, 20], "img_root": [14, 16, 41, 45], "splitext": 14, "totensor": [14, 16], "topilimag": [14, 16], "image_fil": [14, 16], "fname": [14, 16], "append": [14, 16, 17, 20, 22, 45], "len": [14, 16, 18], "break": [14, 30], "minit": [14, 16], "2000": [14, 16], "tar_path": [14, 16], "pack": [14, 16], "mode": [14, 16, 29, 30, 36, 39, 40], "w": [14, 33], "arcnam": 14, "rpath": [14, 16, 18], "get_fs_token_path": [14, 16, 18], "assert": [14, 16, 17, 18, 27], "rm": 14, "global": [14, 17, 20, 22], "sphinx_gallery_thumbnail_path": [14, 16, 17, 18, 19, 20, 22, 23, 24], "_static": [14, 16, 17, 18, 19, 20, 22, 23, 24], "img": [14, 16, 17, 18, 19, 20, 22, 23, 24, 41, 47], "png": [14, 16, 17, 18, 19, 20, 22, 23, 24], "demonstr": [15, 33], "themselv": 15, "notic": [15, 35, 38, 42, 47], "pip": [15, 25, 29, 35, 38, 39], "git": [15, 29, 36], "clone": [15, 29, 36], "github": [15, 22, 39, 40, 44, 46], "torchx_vers": 15, "sed": 15, "checkout": [15, 27, 29, 47], "dev": [15, 25, 29, 35, 39, 40, 42, 45], "txt": [15, 29, 43, 45, 46], "repo": [15, 29, 36, 47], "interpret": [15, 18, 22, 25, 41, 45], "sever": [15, 45], "ism": 15, "respect": [15, 34, 47], "profil": [15, 20, 29, 36], "examples_apps_python": 15, "examples_apps_jupyt": 15, "numpi": [16, 17, 29], "pl": [16, 18, 20], "dataload": [16, 17], "imagefoldersamplesdataset": 16, "imagefold": 16, "sampl": [16, 46], "num_sampl": [16, 20], "super": [16, 18, 19], "__len__": 16, "fixm": [16, 17, 18, 31], "attribut": [16, 17, 43], "test_d": 16, "train_d": 16, "val_d": 16, "tinyimagenetdatamodul": [16, 17, 20], "lightningdatamodul": 16, "data_dir": [16, 17, 20], "batch_siz": [16, 17, 20], "loader": 16, "img_transform": 16, "val": [16, 18, 20], "train_dataload": 16, "val_dataload": 16, "test_dataload": [16, 17], "download_data": [16, 17, 20], "remote_path": [16, 18], "unextract": 16, "isdir": 16, "data_path": [16, 17, 20, 22], "create_random_data": [16, 17, 20], "num_imag": 16, "250": 16, "fill": [16, 31, 34], "randomli": 16, "64x64": 16, "preprocess": [16, 17, 22], "train_path": 16, "class1_train_path": 16, "class1": 16, "class2_train_path": 16, "class2": 16, "val_path": 16, "class1_val_path": 16, "class2_val_path": 16, "test_path": 16, "class1_test_path": 16, "class2_test_path": 16, "makedir": [16, 17, 20], "fileexistserror": 16, "rang": [16, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pixel": 16, "rand": 16, "255": 16, "im": 16, "fromarrai": 16, "astyp": 16, "uint8": 16, "rgb": 16, "rand_image_": 16, "jpeg": 16, "process_imag": 16, "lib": [16, 18, 19, 29, 45], "seri": [17, 29, 36], "gradient": [17, 22], "overlai": [17, 29, 47], "ai": 17, "cifar_torchvision_interpret": 17, "load_path": [17, 20, 22], "last": [17, 22, 45], "viewer": [17, 28], "visual": 17, "equal": [17, 45], "benefit": 17, "swap": 17, "itertool": 17, "tinyimagenetmodel": [17, 18, 20], "otherwis": [17, 20, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "crash": [17, 45], "np": 17, "attr": 17, "integratedgradi": 17, "viz": 17, "checkpoint": [17, 20, 22, 29, 36], "weight": [17, 33], "analysi": 17, "convert_to_rgb": 17, "arr": 17, "tensor": [17, 18, 29], "ndarrai": 17, "24": [17, 27, 29, 36, 40, 45], "arrai": 17, "squeez": 17, "swapax": 17, "shape": 17, "invalid": [17, 30, 45], "produc": [17, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "init": [17, 20], "load_from_checkpoint": [17, 20], "checkpoint_path": [17, 20], "els": [17, 20, 22], "ig": 17, "first": [17, 22, 23, 24, 29, 30, 34, 45, 46, 47], "islic": 17, "unsqueez": 17, "dim": 17, "zero_grad": 17, "attr_ig": 17, "delta": [17, 45], "baselin": 17, "return_convergence_delta": 17, "count_nonzero": 17, "toi": [17, 18], "sometim": 17, "due": [17, 39, 41], "fig": 17, "axi": 17, "visualize_image_attr": 17, "blended_heat_map": 17, "sign": [17, 29, 37], "show_colorbar": 17, "titl": 17, "out_path": [17, 18], "ig_": 17, "heatmap": 17, "wb": 17, "savefig": 17, "regress": 18, "tupl": [18, 28, 35, 36, 39, 40, 44, 45, 47], "jit": 18, "nn": 18, "torchmetr": 18, "accuraci": [18, 30, 33], "resnet": [18, 29], "basicblock": [18, 29], "lightningmodul": 18, "linear": [18, 29], "net": 18, "layer_s": 18, "lr": [18, 20], "001": 18, "small": [18, 29, 39, 40], "tweak": 18, "match": [18, 31, 44], "tinyimagenet": 18, "avgpool": 18, "adaptiveavgpool2d": 18, "fc": [18, 29], "out_featur": [18, 29], "train_acc": [18, 20], "val_acc": [18, 20], "14": 18, "training_step": 18, "batch": [18, 20, 25, 34], "batch_idx": 18, "_step": 18, "validation_step": 18, "val_batch": 18, "step_nam": 18, "acc_metr": 18, "y": 18, "y_pred": 18, "loss": 18, "cross_entropi": 18, "_loss": 18, "_acc": 18, "todo": 18, "aivan": 18, "fb": 18, "cannot": [18, 27, 41, 42, 45], "configure_optim": 18, "adamw": 18, "export_inference_model": [18, 20], "torchscript": 18, "serial": [18, 33, 43], "dure": [18, 29, 36, 41, 45], "jite": 18, "jit_path": 18, "model_jit": 18, "model_nam": [18, 22], "tiny_image_net": [18, 22], "mar_path": 18, "mar": [18, 22], "handler": 18, "durat": [19, 29, 36], "ax": 19, "simpleloggingprofil": [19, 20], "action": [19, 20, 30], "report": [19, 30], "duration_": 19, "event": [19, 29, 44], "current_act": 19, "action_nam": 19, "valueerror": [19, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "monoton": 19, "stop": [19, 30, 45], "end_tim": 19, "start_tim": 19, "pop": 19, "log_metr": 19, "runtim": [20, 28, 29, 31, 32, 33, 37, 39, 40, 41, 45, 47], "epoch": [20, 22], "log_path": [20, 22], "skip_export": 20, "1x1": [20, 22], "addit": [20, 29, 31, 36, 40, 41, 45], "callback": 20, "store_tru": 20, "narg": 20, "mlp": 20, "hidden": 20, "neural": 20, "get_model_checkpoint": 20, "behav": [20, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "deadlock": 20, "train_loss": 20, "dirpath": [20, 43], "save_last": 20, "checkpoint_callback": 20, "save_dir": 20, "lightning_log": [20, 22], "num_nod": 20, "group_world_s": 20, "acceler": 20, "cuda": 20, "is_avail": 20, "devic": [20, 29, 35, 37, 39, 40, 41, 45], "local_world_s": 20, "strategi": 20, "max_epoch": 20, "acc": 20, "intro": 21, "examples_pipelines_python": 21, "examples_pipelines_jupyt": 21, "someth": [22, 26], "dist_ddp": 22, "utils_copi": 22, "utils_python": 22, "container_from_app": [22, 24, 28], "modifi": [22, 45, 46], "rebuild": [22, 47], "awai": 22, "blob": [22, 33, 40], "readm": [22, 29], "md": [22, 40], "svc": 22, "somewher": 22, "copy_app": 22, "next": 22, "raw": [22, 30, 39, 45], "previou": [22, 27, 45, 46], "ahead": 22, "fulli": [22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "processed_data_path": 22, "datapreproc_app": 22, "fast": [22, 25], "autom": 22, "__file__": 22, "dirnam": 22, "logs_path": 22, "models_path": 22, "trainer_app": 22, "3000": 22, "ui_metadata": [22, 28], "serve_app": 22, "initial_work": 22, "interpret_path": 22, "interpret_app": 22, "track": [22, 25, 29, 39, 44, 46], "set_tti": 22, "respons": [22, 34, 45], "compil": [22, 23, 24, 28], "pipeline_func": [22, 23, 24, 28], "package_path": [22, 23, 24, 28], "rt": [22, 23, 24], "advanced_pipelin": 22, "resource_from_app": [23, 28], "volcano": [23, 28, 29, 31, 39], "echo_app": [23, 24], "alpin": [23, 24, 35, 36, 39, 40, 42], "instanti": [23, 24, 34, 41], "echo_contain": [23, 24], "baseop": 23, "sdk": [23, 24, 28, 29], "chain": [23, 24, 33], "dist_pipelin": 23, "introductori": 24, "cross": 24, "mechan": [24, 37, 45, 46], "wherev": 24, "component_from_app": [24, 28], "convers": 24, "intro_pipelin": 24, "univers": 25, "launcher": 25, "research": 25, "product": 25, "concept": [25, 29, 39, 40], "torchxconfig": [25, 46], "mcad": [25, 29, 34], "rai": [25, 29, 34], "sagemak": [25, 29, 34], "ibm": [25, 34], "spectrum": [25, 34], "lsf": [25, 29, 34], "gcp": [25, 29, 34], "airflow": [25, 26], "deploy": [26, 40, 41], "assembl": 26, "easili": 27, "No": 27, "special": 27, "datetim": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pendulum": 27, "dagrunst": 27, "taskinstancest": 27, "dagruntyp": 27, "dag": 27, "decor": 27, "data_interval_start": 27, "2021": [27, 29], "tz": 27, "utc": 27, "data_interval_end": 27, "timedelta": 27, "dai": [27, 29, 44], "virtualenv": [27, 44], "task_id": 27, "hello_torchx": 27, "run_torchx": 27, "statu": [27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "wait_interv": [27, 30], "raise_for_statu": [27, 45], "didn": 27, "succe": 27, "final": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "log_lin": [27, 30], "make_uniqu": 27, "dag_id": 27, "example_python_oper": 27, "schedule_interv": 27, "start_dat": 27, "catchup": 27, "run_job": 27, "dagrun": 27, "create_dagrun": 27, "execution_d": 27, "data_interv": 27, "run_typ": 27, "ti": 27, "get_task_inst": 27, "get_task": 27, "ignore_ti_st": 27, "success": 27, "ipykernel_4683": 27, "454499020": 27, "removedinairflow3warn": 27, "deprec": [27, 30, 45], "futur": [27, 30, 45, 46], "releas": [27, 39, 40, 46], "23t16": 27, "950": 27, "0000": 27, "taskinst": 27, "2076": 27, "met": 27, "dep_context": 27, "requeueabl": 27, "lt": [27, 29], "vxmc43hj9dj2g": 27, "manual__2021": 27, "09": 27, "13t00": 27, "00": 27, "955": 27, "956": 27, "2306": 27, "957": 27, "2388": 27, "queued_dur": 27, "966": 27, "2330": 27, "_pythondecoratedoper": 27, "227": 27, "2648": 27, "var": [27, 29, 34, 41], "airflow_ctx_dag_own": 27, "airflow_ctx_dag_id": 27, "airflow_ctx_task_id": 27, "airflow_ctx_execution_d": 27, "airflow_ctx_try_numb": 27, "airflow_ctx_dag_run_id": 27, "230": 27, "430": 27, "endgroup": 27, "924": 27, "72": 27, "927": 27, "local_schedul": [27, 34, 41], "771": 27, "928": 27, "777": 27, "torchx_wh8khrqb": 27, "036": 27, "237": 27, "valu": [27, 29, 31, 33, 36, 39, 40, 41, 45, 46, 47], "037": 27, "441": 27, "post": 27, "042": 27, "1206": 27, "mark": 27, "run_id": [27, 46], "20210913t000000": 27, "20240723t165423": 27, "end_dat": 27, "20240723t165425": 27, "goe": 27, "unspecifi": 28, "app_def": 28, "service_account": [28, 29, 39, 40], "resourceop": 28, "containerfactori": 28, "equival": [28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "load_component_from_": 28, "www": [28, 42], "legaci": 28, "component_spec_from_app": 28, "notabl": 28, "protocol": 28, "log_level": 29, "cancel": [29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "job_nam": [29, 36], "overwrit": [29, 30, 41], "extra": [29, 30, 45, 47], "itself": 29, "torchx_jnu_js7p": 29, "ccd2w13k05w6kd": 29, "057c5a4a4d4c": 29, "83919021be0b": 29, "c9afc99a3d8f": 29, "c9afc99a3d8ff0884370fa40619566378dcd1fae5541b214edec5f1517632e4": 29, "46": 29, "47": 29, "k71dwb41j4qk3": 29, "relat": [29, 45], "interest": 29, "dist_app": [29, 42], "init_process_group": 29, "gloo": [29, 42], "am": 29, "get_rank": 29, "get_world_s": 29, "2x2": [29, 42], "56": 29, "e20e4c6c041b": 29, "917e33ee3a92": 29, "55": 29, "03": 29, "88da38ff059f": 29, "88da38ff059f51ed9370709589413db39576cbab42045ff58bc0fe599472d703": 29, "04": 29, "05": 29, "144": 29, "omp_num_thread": 29, "overload": 29, "tune": [29, 33], "188": 29, "rkpk9xmkfv9gcc": 29, "aws_batch": [29, 35, 36], "basi": [29, 39], "daemon": [29, 47], "image_repo": [29, 35, 36, 37, 39, 40], "partit": [29, 44], "copy_env": [29, 37], "privileg": [29, 35, 37, 39, 40], "quiet": [29, 35, 36, 37, 39], "glob": [29, 37], "foo_": [29, 37], "eiher": [29, 37], "semicolon": [29, 37], "ones": [29, 31, 37, 45], "elev": [29, 35, 37], "permiss": [29, 35, 37, 45], "suppress": [29, 35, 36, 37, 39], "verbos": [29, 35, 36, 37, 39], "prepend_cwd": [29, 41], "auto_set_cuda_visible_devic": [29, 41], "prepend": [29, 41], "cuda_available_devic": [29, 41], "assign": [29, 41, 45], "noth": [29, 30, 31, 41], "count": [29, 39, 40, 41], "comment": [29, 44], "constraint": [29, 44], "mail": [29, 44], "job_dir": [29, 44, 47], "hour": [29, 44], "torchxslurmjobdir": [29, 44], "priority_class": [29, 39], "account": [29, 39, 40, 44], "pod": [29, 39, 40], "priorityclass": [29, 39, 40], "kubernetes_mcad": [29, 40], "prioriti": [29, 35, 40, 45], "priority_class_nam": [29, 40], "image_secret": [29, 40], "coscheduler_nam": [29, 40], "network": [29, 36, 40, 41, 42], "higher": [29, 33, 35, 40], "integ": [29, 40], "admin": [29, 40], "openshift": [29, 40], "secret": [29, 40], "privat": [29, 36, 40, 42], "co": [29, 40], "beyond": [29, 33, 40], "share_id": [29, 35], "job_role_arn": [29, 35], "execution_role_arn": [29, 35], "usernam": [29, 35, 36], "getpass": [29, 35, 36], "getus": [29, 35, 36], "polici": [29, 35, 39, 41, 45], "9999": [29, 35], "amazon": [29, 35, 36, 45], "arn": [29, 35, 36], "iam": [29, 35, 36], "ec": [29, 35], "agent": [29, 35], "xdg": 29, "aws_sagemak": [29, 36], "instance_typ": [29, 36], "instance_count": [29, 36], "keep_alive_period_in_second": [29, 36], "volume_s": [29, 36], "volume_kms_kei": [29, 36], "max_run": [29, 36], "input_mod": [29, 36], "output_kms_kei": [29, 36], "base_job_nam": [29, 36], "subnet": [29, 36], "security_group_id": [29, 36], "model_uri": [29, 36], "model_channel_nam": [29, 36], "metric_definit": [29, 36], "encrypt_inter_container_traff": [29, 36], "use_spot_inst": [29, 36], "max_wait": [29, 36], "checkpoint_s3_uri": [29, 36], "checkpoint_local_path": [29, 36], "debugger_hook_config": [29, 36], "enable_sagemaker_metr": [29, 36], "enable_network_isol": [29, 36], "disable_profil": [29, 36], "max_retry_attempt": [29, 36], "source_dir": [29, 36], "git_config": [29, 36], "hyperparamet": [29, 36], "container_log_level": [29, 36], "code_loc": [29, 36], "training_repository_access_mod": [29, 36], "training_repository_credentials_provider_arn": [29, 36], "disable_output_compress": [29, 36], "enable_infra_check": [29, 36], "artifact": [29, 36, 46, 47], "ec2": [29, 35, 36, 45], "c4": [29, 36], "xlarg": [29, 36], "instance_group": [29, 36], "warm": [29, 36], "pool": [29, 36], "subsequ": [29, 36], "gb": [29, 33, 36], "km": [29, 36], "encrypt": [29, 36], "eb": [29, 36], "attach": [29, 36, 40, 47], "60": [29, 36], "algorithm": [29, 36], "estim": [29, 36], "timestamp": [29, 36], "vpc": [29, 36], "secur": [29, 36], "pre": [29, 31, 36], "channel": [29, 36], "traffic": [29, 36], "persist": [29, 36, 39, 40, 45, 46], "emit": [29, 36], "debugg": [29, 36], "unless": [29, 36, 43], "region": [29, 36], "isol": [29, 36, 44], "move": [29, 36, 45], "asid": [29, 36], "branch": [29, 36, 40], "commit": [29, 36], "2fa_en": [29, 36], "password": [29, 36], "token": [29, 36], "lambda": [29, 36], "credenti": [29, 35, 36, 38], "authent": [29, 35, 36, 38, 47], "compress": [29, 36], "gcp_batch": [29, 38], "central1": [29, 38], "cluster_config_fil": [29, 43], "cluster_nam": [29, 43], "dashboard_address": [29, 43], "127": [29, 43], "8265": [29, 43], "dashboard": [29, 43], "address": [29, 43], "against": [29, 43, 45], "lsf_queue": [29, 42], "jobdir": [29, 42], "container_workdir": [29, 42], "host_network": [29, 42], "shm_size": [29, 42], "64m": [29, 42], "shm": [29, 42], "timm_app": 29, "timm": 29, "resnet18": 29, "cuda11": 29, "cudnn8": 29, "newli": [29, 47], "40": 29, "c3f17e5ac010": 29, "48579788648a": 29, "py3": 29, "whl": 29, "satisfi": 29, "pyyaml": 29, "opt": [29, 45], "conda": [29, 44], "python3": 29, "site": 29, "safetensor": 29, "cp37": 29, "cp37m": 29, "manylinux_2_17_x86_64": 29, "manylinux2014_x86_64": 29, "huggingfac": 29, "hub": 29, "huggingface_hub": 29, "268": 29, "kb": 29, "typing_extens": 29, "53": 29, "61": 29, "importlib": 29, "importlib_metadata": 29, "22": [29, 39, 40], "2023": 29, "143": 29, "filelock": 29, "zipp": 29, "urllib3": 29, "27": 29, "chardet": 29, "idna": 29, "certifi": 29, "2017": 29, "pillow": 29, "7794e5bfad20": 29, "956583ec4cab": 29, "2ef42420c318": 29, "088f502c1bad": 29, "088f502c1badc4334099d3102be573ba00528ecebb9e30275ca2beb9a42af5f9": 29, "conv1": 29, "conv2d": 29, "kernel_s": 29, "stride": 29, "pad": 29, "bia": 29, "bn1": 29, "batchnorm2d": 29, "ep": 29, "1e": 29, "momentum": 29, "affin": 29, "track_running_stat": 29, "act1": 29, "relu": 29, "inplac": 29, "maxpool": 29, "maxpool2d": 29, "dilat": 29, "ceil_mod": 29, "layer1": 29, "sequenti": [29, 33], "drop_block": 29, "aa": 29, "conv2": 29, "bn2": 29, "act2": 29, "layer2": 29, "128": 29, "downsampl": 29, "layer3": 29, "256": 29, "layer4": 29, "512": 29, "global_pool": 29, "selectadaptivepool2d": 29, "pool_typ": 29, "avg": 29, "flatten": 29, "start_dim": 29, "end_dim": 29, "in_featur": 29, "1000": 29, "52": 29, "b92d55vkth042": 29, "runcfg": [30, 31, 41], "component_default": 30, "close": [30, 34, 41], "human": 30, "readabl": 30, "constructor": [30, 34], "scheduler_factori": 30, "schedulerfactori": [30, 34], "individu": [30, 42], "act": 30, "upon": [30, 45], "cach": 30, "direct": 30, "soon": 30, "interrupt": 30, "clean": 30, "deem": [30, 34, 41], "associ": [30, 45], "undefin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "ok": 30, "reconstruct": 30, "much": 30, "anymor": 30, "union": [30, 31, 35, 42, 45, 47], "parent_run_id": 30, "appdryruninfo": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dry": [30, 34], "pretti": 30, "dryrun_info": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dryrun_compon": 30, "component_arg": 30, "Will": 30, "listapprespons": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prototyp": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "phase": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "subject": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "should_tail": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "honor": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "guarante": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "highli": 30, "log_it": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "discourag": 30, "partial": [30, 35, 36, 37, 39, 41, 43, 44], "purg": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "whitespac": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "charact": 30, "newlin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "configvalu": [30, 45, 46], "present": [30, 31, 43, 45, 47], "anti": 30, "experi": [30, 46], "matches_regex": 30, "model_accuraci": 30, "parse_accuraci": 30, "experiment_nam": 30, "th": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "fetch": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "left": 30, "empti": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cursor": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "begin": 30, "unknownappexcept": 30, "order": [30, 31, 40, 45], "low": [30, 31], "file_path": 30, "componentvalidationexcept": 30, "componentnotfoundexcept": 30, "sparingli": 30, "abus": 30, "lead": 30, "go": 30, "complianc": 30, "term": 30, "unblock": 30, "certain": [30, 31, 41, 47], "short": 30, "scheduler_backend": [30, 34], "scheduler_run_opt": 30, "local_runopt": 30, "past": 30, "replac": [30, 31, 45, 47], "indefinit": 30, "app_statu": 30, "is_termin": 30, "sleep": [30, 33], "beta": [31, 47], "ini": 31, "sensibl": 31, "placehold": 31, "happi": 31, "redundantli": 31, "decid": 31, "date": 31, "leav": 31, "stale": 31, "ls": 31, "enviorn": 31, "torchx_config": 31, "hierarchi": 31, "overlaid": [31, 47], "malform": 31, "unrecogn": 31, "2x8": 31, "overwritten": [31, 33], "cmd": [31, 42, 44, 45], "addition": [31, 46], "some_workspac": 31, "outmost": 31, "hold": [31, 41, 44, 45], "dir_1": 31, "dir_2": 31, "textio": 31, "configfil": 31, "dump": [31, 33, 43], "required_onli": 31, "templat": [31, 45], "find_config": 31, "filepath": 31, "element": [31, 45], "get_config": 31, "barr": 31, "bazz": 31, "fooo": 31, "load_sect": 31, "content": [31, 44, 47], "categor": 32, "topic": [32, 42], "experiment": [33, 45, 46], "AT": [33, 46], "risk": [33, 46], "TO": [33, 46], "keyword": 33, "intention": 33, "constrain": [33, 39, 40], "hundr": 33, "nor": 33, "quantiti": [33, 45], "hyper": 33, "suppos": 33, "app1": 33, "app2": 33, "feed": 33, "seem": 33, "worri": 33, "pseudo": 33, "do_someth": 33, "s3client": 33, "utf": 33, "output_fil": 33, "input_fil": 33, "decod": 33, "do_something_els": 33, "app1_out": 33, "app1_accuraci": 33, "l2norm": 33, "liter": [33, 45], "1kb": 33, "slash": 33, "statist": 33, "sem": 33, "uniqu": [33, 34, 43, 44, 45], "scope": 33, "central": 33, "entiti": 33, "strong": 33, "made": [33, 45], "similarli": 33, "consecut": 33, "BE": 33, "min": 33, "strongli": 33, "advis": 33, "concaten": 33, "experiment_id": 33, "trial_numb": 33, "123": 33, "attempt_1": 33, "233": 33, "outsid": 33, "get_scheduler_factori": 34, "get_default_scheduler_nam": 34, "default_scheduler_nam": 34, "abc": 34, "abstractmethod": 34, "kill": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "idempot": 34, "thread": [34, 41, 45], "safe": 34, "underli": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "longer": [34, 41], "wrap": [34, 40, 41, 46], "describeapprespons": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "qualifi": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "constitut": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "caller": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prior": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "Is": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "twice": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lost": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "live": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "arbitrari": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "stopiter": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "exhaust": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stuck": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "eventu": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "__getitem__": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "seek": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "50th": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "carriag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "select": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "combin": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "notimplementederror": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "encourag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "trivial": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "submit_dryrun": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "mostli": 34, "regard": 34, "not_set": 34, "appstat": [34, 40, 45], "unsubmit": [34, 45], "num_restart": [34, 45], "structured_error_msg": [34, 45], "ui_url": [34, 45], "roles_status": 34, "rolestatu": [34, 45], "suffici": 34, "recret": 34, "member": 34, "accessor": [34, 45], "popul": [34, 45], "userguid": 35, "batch_getstart": 35, "ecr": 35, "amazonecr": 35, "aws_batch_schedul": 35, "awsbatchschedul": 35, "log_client": 35, "docker_cli": [35, 36, 39, 40, 47], "dockercli": [35, 36, 39, 40, 47], "dockerworkspacemixin": [35, 36, 37, 39, 40, 47], "awsbatchopt": 35, "torchx_us": [35, 36, 39, 44], "1234": [35, 36, 39, 44], "ef": 35, "infiniband": 35, "uverbs0": 35, "perm": [35, 39, 40, 45], "rwm": [35, 37, 39, 40, 45], "parse_mount": [35, 37, 39, 40, 45], "fsx": 35, "repost": 35, "knowledg": 35, "center": 35, "lustr": 35, "fabric": 35, "efa": 35, "batchjob": 35, "nonetyp": [35, 42], "job_def": [35, 36, 38], "images_to_push": [35, 36, 39, 40, 47], "aws_sagemaker_schedul": 36, "awssagemakerschedul": 36, "awssagemakeropt": 36, "sagemakerschedul": 36, "awssagemakerjob": 36, "requri": 36, "docker_schedul": 37, "dockerschedul": 37, "dockeropt": 37, "closest": 37, "dockerjob": 37, "dockercontain": 37, "has_dock": 37, "gcp_batch_schedul": 38, "gcpbatchschedul": 38, "gcpbatchopt": 38, "app_id1234": 38, "gcloud": 38, "gcpbatchjob": 38, "batch_v1": 38, "upgrad": 39, "kubectl": 39, "githubusercont": 39, "develop": 39, "kubernetes_schedul": 39, "kubernetesschedul": 39, "apicli": [39, 40], "kubernetesopt": 39, "confirm": [39, 40], "18": [39, 40], "issu": [39, 40, 42, 44], "120": 39, "occur": [39, 45], "bug": 39, "1651": 39, "extern": [39, 40], "hostpath": [39, 40], "persistentvolumeclaim": [39, 40], "claim": [39, 40], "16000": [39, 40], "reserv": [39, 40], "whole": [39, 40, 45], "reduc": [39, 40], "amount": [39, 40], "kubernetesjob": 39, "app_to_resourc": [39, 40], "macro": 39, "pod_label": [39, 40], "role_idx": [39, 40], "role_to_pod": [39, 40], "v1pod": [39, 40], "sanitize_for_seri": [39, 40], "obj": [39, 40, 45], "dispatch": 40, "appwrapp": 40, "codeflar": 40, "kubernetes_mcad_schedul": 40, "kubernetesmcadschedul": 40, "kubernetesmcadopt": 40, "among": 40, "e790d7f": 40, "your_image_repo": 40, "secondari": 40, "coschedul": 40, "podgroup": 40, "sig": 40, "tree": 40, "pkg": 40, "crd": 40, "k8": 40, "io_podgroup": 40, "At": 40, "guidanc": 40, "evict": [40, 45], "preemption": [40, 45], "multu": 40, "k8snetworkplumbingwg": 40, "cni": 40, "kubernetesmcadjob": 40, "mcad_svc": 40, "svc_name": 40, "service_port": 40, "v1servic": 40, "get_appwrapper_statu": 40, "get_port_for_servic": 40, "get_role_inform": 40, "generic_item": 40, "get_tasks_status_descript": 40, "unique_app_id": 40, "localschedul": 41, "image_provider_class": 41, "localopt": 41, "imageprovid": 41, "cache_s": 41, "extra_path": 41, "properti": [41, 45, 46], "enforc": 41, "orphan": 41, "cleanup": 41, "receiv": 41, "sigterm": 41, "sigint": 41, "spawn": 41, "faster": 41, "softwar": [41, 45], "cuda_visible_devic": 41, "accord": [41, 45], "replica_0": 41, "replica_1": 41, "role_0": 41, "role_1": 41, "replica_2": 41, "localhostschedul": 41, "real": 41, "op": 41, "fetch_rol": 41, "updat": [41, 47], "compli": [41, 45], "deleg": 41, "get_cwd": 41, "child": [41, 45], "get_entrypoint": 41, "get_replica_param": 41, "holder": 41, "cwdimageprovid": 41, "localdirectoryimageprovid": 41, "getcwd": 41, "conjunct": 41, "not_exist": 41, "image_typ": 41, "childprocess": 41, "logiter": 41, "log_fil": 41, "_popen": 41, "signalexcept": 41, "sigval": 41, "signal": 41, "got": 41, "feedback": 42, "edit": 42, "pak": 42, "lsf_schedul": 42, "lsfschedul": 42, "lsfopt": 42, "mnt": 42, "tofix": 42, "On": 42, "reoslv": 42, "lsfbsub": 42, "ray_schedul": 43, "rayschedul": 43, "ray_client": 43, "jobsubmissioncli": 43, "tmpdirworkspacemixin": 43, "rayopt": 43, "actor": 43, "torchxignor": [43, 47], "overridden": 43, "dummi": 43, "rayjob": 43, "wait_until_finish": 43, "has_rai": 43, "indic": [43, 45], "rayactor": 43, "output_filenam": 43, "working_dir": 43, "ray_common": 43, "ip": 43, "connect": 43, "ray_main": 43, "slurm_schedul": 44, "slurmschedul": 44, "dirworkspacemixin": [44, 47], "slurmopt": 44, "heterogen": 44, "sbatch": 44, "jobid": 44, "abl": 44, "schedmd": 44, "section_opt": 44, "inherit": 44, "activ": 44, "heterogeneous_job": 44, "snapshot": 44, "1gb": 44, "realmemori": 44, "workaround": 44, "parallelclust": 44, "2198": 44, "slurmbatchrequest": 44, "slurmreplicarequest": 44, "srun_opt": 44, "sbatch_opt": 44, "classmethod": 44, "from_rol": 44, "nomem": 44, "srun": 44, "treatment": 45, "min_replica": 45, "base_imag": 45, "miss": 45, "bindmount": 45, "volumemount": 45, "devicemount": 45, "duti": 45, "ps": 45, "bundl": 45, "dictat": 45, "ball": 45, "my_imag": 45, "env_var": 45, "500": 45, "tcp_store": 45, "8080": 45, "auto": 45, "scale": 45, "hot_spar": 45, "quorum": 45, "give": 45, "least": 45, "9090": 45, "pre_proc": 45, "encount": 45, "unsuccess": 45, "hardwar": 45, "caveat": 45, "surviv": 45, "untouch": 45, "membership": 45, "departur": 45, "admitt": 45, "violat": 45, "spare": 45, "realli": 45, "physic": 45, "ram": 45, "predec": 45, "registr": 45, "retriev": 45, "gpu_x_1": 45, "named_resources_aw": 45, "taken": 45, "mere": 45, "equval": 45, "mem": 45, "aws_t3": 45, "medium": 45, "aws_m5": 45, "8xlarg": 45, "aws_m5_2xlarg": 45, "aws_p3_2xlarg": 45, "aws_p3_8xlarg": 45, "aws_t3_medium": 45, "mention": 45, "image_root_dir": 45, "train_app": 45, "rank0_env": 45, "base_img_root": 45, "accept": 45, "run_config_opt": 45, "run_as_us": 45, "type_": 45, "cluster_id": 45, "preemptibl": 45, "illeg": 45, "bad_typ": 45, "cfg_kei": 45, "cfg_from_json_repr": 45, "json_repr": 45, "cfg_from_str": 45, "cfg_str": 45, "cast": 45, "appropri": 45, "unknown": 45, "cfg_liter": 45, "kv": 45, "semi": 45, "colon": 45, "cfgval": 45, "trail": 45, "strictli": 45, "correct": 45, "is_typ": 45, "tp": 45, "isinst": 45, "text": 45, "recent": 45, "filter_rol": 45, "appstatuserror": 45, "pend": 45, "yet": [45, 46], "unsuccessfulli": 45, "replicast": 45, "alia": 45, "src_path": 45, "dst_path": 45, "read_onli": 45, "mknode": 45, "file_lint": 45, "component_funct": 45, "lintermessag": 45, "vaidat": 45, "stypl": 45, "get_fn_docstr": 45, "char": 45, "torchfunctionvisitor": 45, "component_function_nam": 45, "visitor": 45, "torchxfunctionargsvalid": 45, "criteria": 45, "primitive_typ": 45, "visit_functiondef": 45, "functiondef": 45, "torchxargumenthelpformatt": 45, "prog": 45, "indent_incr": 45, "max_help_posit": 45, "width": 45, "formatt": 45, "app_specs_func_def": 45, "torchxfunctionvalid": 45, "torchxreturnvalid": 45, "practition": 46, "conceptu": 46, "uniform": 46, "solut": 46, "tracker_nam": 46, "inject": 46, "entry_point_or_module_factory_method": 46, "tracker1": 46, "tracker2": 46, "backend_2_entry_point": 46, "tracker3": 46, "mlflow": 46, "create_track": 46, "my_bucket": 46, "my_config": 46, "discover": 46, "accomplish": 46, "entry_point_nam": 46, "create_tracker_fn": 46, "app_run_from_env": 46, "torchx_job_id": 46, "app_run": 46, "fsspectrack": 46, "cmdtracker": 46, "parent": 46, "artifact_nam": 46, "consumpt": 46, "encapsul": 46, "stil": 46, "abstractfilesystem": [46, 47], "root_dir": 46, "backward": 46, "gurante": 46, "subdir": 46, "descend": 46, "cmd_tracker": 46, "workspacemixin": 47, "mix": 47, "abil": 47, "codebas": 47, "build_workspace_and_update_rol": 47, "simplest": 47, "effici": 47, "increment": 47, "mutat": 47, "dryrun_push_imag": 47, "dryrun_push": 47, "push_imag": 47, "workspace_opt": 47, "walk_workspac": 47, "ignore_nam": 47, "engin": 47, "builder": 47, "exclud": 47, "whose": 47, "_update_app_imag": 47, "_push_imag": 47}, "objects": {"torchx": [[3, 0, 0, "-", "cli"], [8, 0, 0, "-", "components"], [26, 0, 0, "-", "pipelines"], [30, 0, 0, "-", "runner"], [32, 0, 0, "-", "runtime"], [34, 0, 0, "-", "schedulers"], [45, 0, 0, "-", "specs"], [46, 0, 0, "-", "tracker"], [47, 0, 0, "-", "workspace"]], "torchx.cli.cmd_tracker": [[46, 1, 1, "", "CmdTracker"]], "torchx.components": [[4, 0, 0, "-", "component_test_base"], [5, 0, 0, "-", "dist"], [6, 0, 0, "-", "interpret"], [7, 0, 0, "-", "metrics"], [9, 0, 0, "-", "serve"], [10, 0, 0, "-", "train"], [11, 0, 0, "-", "utils"]], "torchx.components.component_test_base": [[4, 1, 1, "", "ComponentTestCase"]], "torchx.components.component_test_base.ComponentTestCase": [[4, 2, 1, "", "run_component"], [4, 2, 1, "", "setUp"], [4, 2, 1, "", "tearDown"], [4, 2, 1, "", "validate"]], "torchx.components.dist": [[5, 3, 1, "", "_TORCH_DEBUG_FLAGS"], [5, 4, 1, "", "ddp"]], "torchx.components.metrics": [[7, 4, 1, "", "tensorboard"]], "torchx.components.serve": [[9, 4, 1, "", "torchserve"]], "torchx.components.utils": [[11, 4, 1, "", "binary"], [11, 4, 1, "", "booth"], [11, 4, 1, "", "copy"], [11, 4, 1, "", "echo"], [11, 4, 1, "", "python"], [11, 4, 1, "", "sh"], [11, 4, 1, "", "touch"]], "torchx.pipelines": [[28, 0, 0, "-", "kfp"]], "torchx.pipelines.kfp.adapter": [[28, 1, 1, "", "ContainerFactory"], [28, 4, 1, "", "component_from_app"], [28, 4, 1, "", "component_spec_from_app"], [28, 4, 1, "", "container_from_app"], [28, 4, 1, "", "resource_from_app"]], "torchx.runner": [[30, 1, 1, "", "Runner"], [31, 0, 0, "-", "config"], [30, 4, 1, "", "get_runner"]], "torchx.runner.Runner": [[30, 2, 1, "", "cancel"], [30, 2, 1, "", "close"], [30, 2, 1, "", "describe"], [30, 2, 1, "", "dryrun"], [30, 2, 1, "", "dryrun_component"], [30, 2, 1, "", "list"], [30, 2, 1, "", "log_lines"], [30, 2, 1, "", "run"], [30, 2, 1, "", "run_component"], [30, 2, 1, "", "schedule"], [30, 2, 1, "", "scheduler_backends"], [30, 2, 1, "", "scheduler_run_opts"], [30, 2, 1, "", "status"], [30, 2, 1, "", "stop"], [30, 2, 1, "", "wait"]], "torchx.runner.config": [[31, 4, 1, "", "apply"], [31, 4, 1, "", "dump"], [31, 4, 1, "", "find_configs"], [31, 4, 1, "", "get_config"], [31, 4, 1, "", "get_configs"], [31, 4, 1, "", "load"], [31, 4, 1, "", "load_sections"]], "torchx.runtime": [[33, 0, 0, "-", "tracking"]], "torchx.runtime.tracking": [[33, 1, 1, "", "FsspecResultTracker"], [33, 1, 1, "", "ResultTracker"]], "torchx.schedulers": [[34, 1, 1, "", "Scheduler"], [34, 1, 1, "", "SchedulerFactory"], [35, 0, 0, "-", "aws_batch_scheduler"], [36, 0, 0, "-", "aws_sagemaker_scheduler"], [37, 0, 0, "-", "docker_scheduler"], [38, 0, 0, "-", "gcp_batch_scheduler"], [34, 4, 1, "", "get_default_scheduler_name"], [34, 4, 1, "", "get_scheduler_factories"], [40, 0, 0, "-", "kubernetes_mcad_scheduler"], [39, 0, 0, "-", "kubernetes_scheduler"], [41, 0, 0, "-", "local_scheduler"], [42, 0, 0, "-", "lsf_scheduler"], [43, 0, 0, "-", "ray_scheduler"], [44, 0, 0, "-", "slurm_scheduler"]], "torchx.schedulers.Scheduler": [[34, 2, 1, "", "cancel"], [34, 2, 1, "", "close"], [34, 2, 1, "", "describe"], [34, 2, 1, "", "exists"], [34, 2, 1, "", "list"], [34, 2, 1, "", "log_iter"], [34, 2, 1, "", "run_opts"], [34, 2, 1, "", "schedule"], [34, 2, 1, "", "submit"], [34, 2, 1, "", "submit_dryrun"]], "torchx.schedulers.api": [[34, 1, 1, "", "DescribeAppResponse"], [34, 1, 1, "", "ListAppResponse"]], "torchx.schedulers.aws_batch_scheduler": [[35, 1, 1, "", "AWSBatchScheduler"], [35, 1, 1, "", "BatchJob"], [35, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_batch_scheduler.AWSBatchScheduler": [[35, 2, 1, "", "describe"], [35, 2, 1, "", "list"], [35, 2, 1, "", "log_iter"], [35, 2, 1, "", "schedule"]], "torchx.schedulers.aws_sagemaker_scheduler": [[36, 1, 1, "", "AWSSageMakerJob"], [36, 1, 1, "", "AWSSageMakerScheduler"], [36, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_sagemaker_scheduler.AWSSageMakerScheduler": [[36, 2, 1, "", "describe"], [36, 2, 1, "", "list"], [36, 2, 1, "", "log_iter"], [36, 2, 1, "", "schedule"]], "torchx.schedulers.docker_scheduler": [[37, 1, 1, "", "DockerContainer"], [37, 1, 1, "", "DockerJob"], [37, 1, 1, "", "DockerScheduler"], [37, 4, 1, "", "create_scheduler"], [37, 4, 1, "", "has_docker"]], "torchx.schedulers.docker_scheduler.DockerScheduler": [[37, 2, 1, "", "describe"], [37, 2, 1, "", "list"], [37, 2, 1, "", "log_iter"], [37, 2, 1, "", "schedule"]], "torchx.schedulers.gcp_batch_scheduler": [[38, 1, 1, "", "GCPBatchJob"], [38, 1, 1, "", "GCPBatchScheduler"], [38, 4, 1, "", "create_scheduler"]], "torchx.schedulers.gcp_batch_scheduler.GCPBatchScheduler": [[38, 2, 1, "", "describe"], [38, 2, 1, "", "list"], [38, 2, 1, "", "log_iter"], [38, 2, 1, "", "run_opts"], [38, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_mcad_scheduler": [[40, 1, 1, "", "KubernetesMCADJob"], [40, 1, 1, "", "KubernetesMCADScheduler"], [40, 4, 1, "", "app_to_resource"], [40, 4, 1, "", "create_scheduler"], [40, 4, 1, "", "get_appwrapper_status"], [40, 4, 1, "", "get_port_for_service"], [40, 4, 1, "", "get_role_information"], [40, 4, 1, "", "get_tasks_status_description"], [40, 4, 1, "", "mcad_svc"], [40, 4, 1, "", "pod_labels"], [40, 4, 1, "", "role_to_pod"], [40, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_mcad_scheduler.KubernetesMCADScheduler": [[40, 2, 1, "", "describe"], [40, 2, 1, "", "list"], [40, 2, 1, "", "log_iter"], [40, 2, 1, "", "run_opts"], [40, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_scheduler": [[39, 1, 1, "", "KubernetesJob"], [39, 1, 1, "", "KubernetesScheduler"], [39, 4, 1, "", "app_to_resource"], [39, 4, 1, "", "create_scheduler"], [39, 4, 1, "", "pod_labels"], [39, 4, 1, "", "role_to_pod"], [39, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_scheduler.KubernetesScheduler": [[39, 2, 1, "", "describe"], [39, 2, 1, "", "list"], [39, 2, 1, "", "log_iter"], [39, 2, 1, "", "schedule"]], "torchx.schedulers.local_scheduler": [[41, 1, 1, "", "CWDImageProvider"], [41, 1, 1, "", "ImageProvider"], [41, 1, 1, "", "LocalDirectoryImageProvider"], [41, 1, 1, "", "LocalScheduler"], [41, 1, 1, "", "LogIterator"], [41, 1, 1, "", "PopenRequest"], [41, 1, 1, "", "ReplicaParam"], [41, 1, 1, "", "SignalException"], [41, 4, 1, "", "create_scheduler"]], "torchx.schedulers.local_scheduler.CWDImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.ImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "fetch_role"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"], [41, 2, 1, "", "get_replica_param"]], "torchx.schedulers.local_scheduler.LocalDirectoryImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.LocalScheduler": [[41, 2, 1, "", "auto_set_CUDA_VISIBLE_DEVICES"], [41, 2, 1, "", "close"], [41, 2, 1, "", "describe"], [41, 2, 1, "", "list"], [41, 2, 1, "", "log_iter"], [41, 2, 1, "", "schedule"]], "torchx.schedulers.lsf_scheduler": [[42, 1, 1, "", "LsfBsub"], [42, 1, 1, "", "LsfScheduler"], [42, 4, 1, "", "create_scheduler"]], "torchx.schedulers.lsf_scheduler.LsfScheduler": [[42, 2, 1, "", "describe"], [42, 2, 1, "", "list"], [42, 2, 1, "", "log_iter"], [42, 2, 1, "", "schedule"]], "torchx.schedulers.ray_scheduler": [[43, 1, 1, "", "RayJob"], [43, 1, 1, "", "RayScheduler"], [43, 4, 1, "", "create_scheduler"], [43, 4, 1, "", "has_ray"], [43, 4, 1, "", "serialize"]], "torchx.schedulers.ray_scheduler.RayScheduler": [[43, 2, 1, "", "describe"], [43, 2, 1, "", "list"], [43, 2, 1, "", "log_iter"], [43, 2, 1, "", "schedule"], [43, 2, 1, "", "wait_until_finish"]], "torchx.schedulers.slurm_scheduler": [[44, 1, 1, "", "SlurmBatchRequest"], [44, 1, 1, "", "SlurmReplicaRequest"], [44, 1, 1, "", "SlurmScheduler"], [44, 4, 1, "", "create_scheduler"]], "torchx.schedulers.slurm_scheduler.SlurmBatchRequest": [[44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmReplicaRequest": [[44, 2, 1, "", "from_role"], [44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmScheduler": [[44, 2, 1, "", "describe"], [44, 2, 1, "", "list"], [44, 2, 1, "", "log_iter"], [44, 2, 1, "", "schedule"]], "torchx.specs": [[45, 1, 1, "", "AppDef"], [45, 1, 1, "", "AppState"], [45, 1, 1, "", "AppStatus"], [45, 1, 1, "", "BindMount"], [45, 1, 1, "", "DeviceMount"], [45, 5, 1, "", "ReplicaState"], [45, 1, 1, "", "Resource"], [45, 1, 1, "", "RetryPolicy"], [45, 1, 1, "", "Role"], [45, 1, 1, "", "VolumeMount"], [45, 0, 0, "-", "file_linter"], [45, 4, 1, "", "get_named_resources"], [45, 1, 1, "", "macros"], [45, 0, 0, "-", "named_resources_aws"], [45, 4, 1, "", "parse_mounts"], [45, 4, 1, "", "resource"], [45, 1, 1, "", "runopts"]], "torchx.specs.AppStatus": [[45, 2, 1, "", "format"], [45, 2, 1, "", "raise_for_status"]], "torchx.specs.Resource": [[45, 2, 1, "", "copy"]], "torchx.specs.Role": [[45, 2, 1, "", "pre_proc"]], "torchx.specs.file_linter": [[45, 1, 1, "", "LinterMessage"], [45, 1, 1, "", "TorchFunctionVisitor"], [45, 1, 1, "", "TorchXArgumentHelpFormatter"], [45, 1, 1, "", "TorchxFunctionArgsValidator"], [45, 1, 1, "", "TorchxFunctionValidator"], [45, 1, 1, "", "TorchxReturnValidator"], [45, 4, 1, "", "get_fn_docstring"], [45, 4, 1, "", "validate"]], "torchx.specs.file_linter.TorchFunctionVisitor": [[45, 2, 1, "", "visit_FunctionDef"]], "torchx.specs.file_linter.TorchxFunctionArgsValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxFunctionValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxReturnValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.macros": [[45, 1, 1, "", "Values"]], "torchx.specs.macros.Values": [[45, 2, 1, "", "apply"], [45, 2, 1, "", "substitute"]], "torchx.specs.named_resources_aws": [[45, 4, 1, "", "aws_m5_2xlarge"], [45, 4, 1, "", "aws_p3_2xlarge"], [45, 4, 1, "", "aws_p3_8xlarge"], [45, 4, 1, "", "aws_t3_medium"]], "torchx.specs.runopts": [[45, 2, 1, "", "add"], [45, 2, 1, "", "cfg_from_json_repr"], [45, 2, 1, "", "cfg_from_str"], [45, 2, 1, "", "get"], [45, 2, 1, "", "is_type"], [45, 2, 1, "", "resolve"]], "torchx.tracker": [[46, 1, 1, "", "AppRun"]], "torchx.tracker.api": [[46, 1, 1, "", "TrackerBase"]], "torchx.tracker.backend.fsspec": [[46, 1, 1, "", "FsspecTracker"]], "torchx.workspace": [[47, 1, 1, "", "WorkspaceMixin"], [47, 0, 0, "-", "dir_workspace"], [47, 0, 0, "-", "docker_workspace"], [47, 4, 1, "", "walk_workspace"]], "torchx.workspace.WorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]], "torchx.workspace.dir_workspace": [[47, 1, 1, "", "DirWorkspaceMixin"]], "torchx.workspace.dir_workspace.DirWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"]], "torchx.workspace.docker_workspace": [[47, 1, 1, "", "DockerWorkspaceMixin"]], "torchx.workspace.docker_workspace.DockerWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:data", "4": "py:function", "5": "py:attribute"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "data", "Python data"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"]}, "titleterms": {"advanc": [0, 22, 46], "usag": [0, 14, 17, 20, 25, 31, 33, 46], "regist": 0, "custom": [0, 8, 12, 29], "schedul": [0, 2, 3, 25, 29, 34], "name": [0, 4, 45], "resourc": [0, 4, 8, 45], "compon": [0, 2, 3, 4, 5, 8, 12, 22, 25, 45], "app": [1, 14], "best": [1, 4, 25], "practic": [1, 4, 25], "data": [1, 14, 15, 46], "pass": [1, 8], "storag": 1, "train": [1, 10], "loop": 1, "metric": [1, 7], "checkpoint": 1, "fine": 1, "tune": 1, "interpret": [1, 6, 17], "model": [1, 17, 18], "packag": 1, "python": 1, "save": 1, "weight": 1, "torchscript": 1, "torchserv": 1, "archiv": 1, "mar": 1, "torch": 1, "serv": [1, 9], "infer": 1, "test": [1, 4], "basic": 2, "concept": 2, "project": 2, "structur": 2, "appdef": [2, 45], "runner": [2, 30], "pipelin": [2, 8, 21, 22, 23, 24, 25, 26, 28], "adapt": 2, "runtim": [2, 25], "next": [2, 27, 29], "step": [2, 27, 29], "cli": [3, 8, 31], "list": 3, "builtin": [3, 5, 8, 12], "support": 3, "argument": [3, 4, 22], "run": [3, 8, 45], "job": [3, 8, 46], "inspect": 3, "what": 3, "dryrun": 3, "describ": 3, "queri": [3, 46], "statu": [3, 45], "view": 3, "log": [3, 19], "entrypoint": 4, "simplifi": 4, "process": 4, "branch": 4, "logic": 4, "document": [4, 25], "compos": 4, "distribut": [4, 5, 23, 29], "defin": 4, "all": [4, 26, 34], "unit": 4, "integr": 4, "ddp": 5, "api": [5, 25, 31], "refer": [7, 25, 35, 36, 37, 38, 39, 40, 41, 42, 46], "overview": [8, 32, 33, 46], "us": 8, "author": 8, "valid": 8, "programmat": [8, 31], "param": 8, "from": 8, "addit": 8, "embed": 10, "script": 10, "util": 11, "hello": [12, 29], "world": [12, 13, 15, 29], "comput": [13, 15], "size": [13, 15], "exampl": [13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 46], "preprocess": [14, 15], "applic": [15, 25], "prerequisit": [15, 35, 38, 39, 40, 42], "lightn": 15, "trainer": [15, 16, 20], "dataset": 16, "tini": 18, "imagenet": 18, "simpl": 19, "profil": 19, "kubeflow": [21, 22, 23, 24, 28], "input": 22, "creat": 22, "definit": 22, "intro": 24, "torchx": [25, 26, 28, 30, 34, 45, 46, 47], "In": 25, "1": [25, 46], "2": [25, 46], "3": 25, "work": 25, "With": 25, "librari": 25, "airflow": 27, "kfp": 28, "quickstart": 29, "instal": 29, "launch": 29, "workspac": [29, 47], "patch": 29, "torchxconfig": [29, 31], "remot": 29, "imag": [29, 41], "docker": [29, 37], "base": [29, 33], "slurm": [29, 44], "function": [30, 31, 34], "class": [30, 34], "config": [31, 45], "track": 33, "resulttrack": 33, "fsspec": 33, "aw": [35, 36, 45], "batch": [35, 38], "sagemak": 36, "gcp": 38, "kubernet": [39, 40], "mcad": 40, "local": 41, "provid": 41, "ibm": 42, "spectrum": 42, "lsf": 42, "rai": 43, "spec": 45, "role": 45, "macro": 45, "mount": 45, "linter": 45, "tracker": 46, "setup": 46, "launcher": 46, "side": 46, "configur": 46, "user": 46, "acquir": 46, "apprun": 46, "instanc": 46, "trackerbas": 46, "implement": 46, "docker_workspac": 47, "dir_workspac": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx": 56}}) \ No newline at end of file