diff --git a/0.7.0dev0/_downloads/00d67ca99480eb8a810baf2dcc8feeff/examples_apps_python.zip b/0.7.0dev0/_downloads/00d67ca99480eb8a810baf2dcc8feeff/examples_apps_python.zip index b14e7c4b3..a4a6208e5 100644 Binary files a/0.7.0dev0/_downloads/00d67ca99480eb8a810baf2dcc8feeff/examples_apps_python.zip and b/0.7.0dev0/_downloads/00d67ca99480eb8a810baf2dcc8feeff/examples_apps_python.zip differ diff --git a/0.7.0dev0/_downloads/0565b2ee0f8662d1acf800c01d94e038/examples_apps_jupyter.zip b/0.7.0dev0/_downloads/0565b2ee0f8662d1acf800c01d94e038/examples_apps_jupyter.zip index dd653a5fc..8f8c09169 100644 Binary files a/0.7.0dev0/_downloads/0565b2ee0f8662d1acf800c01d94e038/examples_apps_jupyter.zip and b/0.7.0dev0/_downloads/0565b2ee0f8662d1acf800c01d94e038/examples_apps_jupyter.zip differ diff --git a/0.7.0dev0/_downloads/3d78ac59c117d4c58eca135c8ee3f34a/examples_pipelines_python.zip b/0.7.0dev0/_downloads/3d78ac59c117d4c58eca135c8ee3f34a/examples_pipelines_python.zip index f58f34b05..911460280 100644 Binary files a/0.7.0dev0/_downloads/3d78ac59c117d4c58eca135c8ee3f34a/examples_pipelines_python.zip and b/0.7.0dev0/_downloads/3d78ac59c117d4c58eca135c8ee3f34a/examples_pipelines_python.zip differ diff --git a/0.7.0dev0/_downloads/7e7e4caf540a83f1776a6e62b5c440ff/examples_pipelines_jupyter.zip b/0.7.0dev0/_downloads/7e7e4caf540a83f1776a6e62b5c440ff/examples_pipelines_jupyter.zip index abd41587f..d9c0942c5 100644 Binary files a/0.7.0dev0/_downloads/7e7e4caf540a83f1776a6e62b5c440ff/examples_pipelines_jupyter.zip and b/0.7.0dev0/_downloads/7e7e4caf540a83f1776a6e62b5c440ff/examples_pipelines_jupyter.zip differ diff --git a/0.7.0dev0/custom_components.html b/0.7.0dev0/custom_components.html index b49ce3a05..76e29a717 100644 --- a/0.7.0dev0/custom_components.html +++ b/0.7.0dev0/custom_components.html @@ -500,7 +500,7 @@

Hello World
-torchx 2024-04-12 18:51:09 INFO     Tracker configurations: {}
+torchx 2024-04-15 15:53:26 INFO     Tracker configurations: {}
 
@@ -508,7 +508,7 @@

Hello World
-torchx 2024-04-12 18:51:09 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
+torchx 2024-04-15 15:53:26 INFO     Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option
 

@@ -516,7 +516,7 @@

Hello World
-torchx 2024-04-12 18:51:09 INFO     Log directory is: /tmp/torchx_3g6fjg0a
+torchx 2024-04-15 15:53:26 INFO     Log directory is: /tmp/torchx_mh72sa54
 

@@ -524,7 +524,7 @@

Hello World
-torchx 2024-04-12 18:51:09 INFO     Waiting for the app to finish...
+torchx 2024-04-15 15:53:26 INFO     Waiting for the app to finish...
 

@@ -540,7 +540,7 @@

Hello World
-torchx 2024-04-12 18:51:10 INFO     Job finished: SUCCEEDED
+torchx 2024-04-15 15:53:27 INFO     Job finished: SUCCEEDED
 

@@ -548,7 +548,7 @@

Hello World
-local_cwd://torchx/hello_world-wclp7r2vht0fcd
+local_cwd://torchx/hello_world-tx0hp3qlcb737
 

If we want to run in other environments, we can build a Docker container so we can run our component in Docker enabled environments such as Kubernetes or via the local Docker scheduler.

@@ -677,7 +677,7 @@

Hello World
-#3 DONE 0.3s
+#3 DONE 0.4s
 
+ +
+
+
+
+
+#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 21.46MB / 21.46MB 0.6s done
 
+ +
+
+
+
+
+#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 131.07MB / 341.29MB 1.9s
 

We can then launch it on the local scheduler.

@@ -1727,7 +1743,7 @@

Hello World
-torchx 2024-04-12 18:52:40 INFO     Tracker configurations: {}
+torchx 2024-04-15 15:55:06 INFO     Tracker configurations: {}
 

If you have a Kubernetes cluster you can use the Kubernetes scheduler to launch this on the cluster instead.

@@ -2027,7 +2043,7 @@

Builtins
-torchx 2024-04-12 18:52:51 INFO     Tracker configurations: {}
+torchx 2024-04-15 15:55:16 INFO     Tracker configurations: {}
 
diff --git a/0.7.0dev0/custom_components.ipynb b/0.7.0dev0/custom_components.ipynb index 3b213b915..544ede4af 100644 --- a/0.7.0dev0/custom_components.ipynb +++ b/0.7.0dev0/custom_components.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "abb03724", + "id": "7b3d42a7", "metadata": {}, "source": [ "# Custom Components\n", @@ -27,13 +27,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "b6f7c3b2", + "id": "3a3c3291", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:51:08.695145Z", - "iopub.status.busy": "2024-04-12T18:51:08.694946Z", - "iopub.status.idle": "2024-04-12T18:51:08.702580Z", - "shell.execute_reply": "2024-04-12T18:51:08.701950Z" + "iopub.execute_input": "2024-04-15T15:53:25.771253Z", + "iopub.status.busy": "2024-04-15T15:53:25.770718Z", + "iopub.status.idle": "2024-04-15T15:53:25.778576Z", + "shell.execute_reply": "2024-04-15T15:53:25.778042Z" } }, "outputs": [ @@ -71,7 +71,7 @@ }, { "cell_type": "markdown", - "id": "742f63bf", + "id": "c4052846", "metadata": {}, "source": [ "Now that we have an app we can write the component file for it. This\n", @@ -84,13 +84,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "bb9f09bd", + "id": "e3461715", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:51:08.705032Z", - "iopub.status.busy": "2024-04-12T18:51:08.704683Z", - "iopub.status.idle": "2024-04-12T18:51:08.708401Z", - "shell.execute_reply": "2024-04-12T18:51:08.707820Z" + "iopub.execute_input": "2024-04-15T15:53:25.781050Z", + "iopub.status.busy": "2024-04-15T15:53:25.780658Z", + "iopub.status.idle": "2024-04-15T15:53:25.784432Z", + "shell.execute_reply": "2024-04-15T15:53:25.783798Z" } }, "outputs": [ @@ -126,7 +126,7 @@ }, { "cell_type": "markdown", - "id": "8d5c035e", + "id": "a48f6360", "metadata": {}, "source": [ "We can execute our component via `torchx run`. The\n", @@ -136,13 +136,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "775d8d72", + "id": "ecf157d4", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:51:08.710786Z", - "iopub.status.busy": "2024-04-12T18:51:08.710589Z", - "iopub.status.idle": "2024-04-12T18:51:10.064278Z", - "shell.execute_reply": "2024-04-12T18:51:10.063501Z" + "iopub.execute_input": "2024-04-15T15:53:25.786849Z", + "iopub.status.busy": "2024-04-15T15:53:25.786473Z", + "iopub.status.idle": "2024-04-15T15:53:27.128378Z", + "shell.execute_reply": "2024-04-15T15:53:27.127715Z" } }, "outputs": [ @@ -150,28 +150,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:51:09 INFO Tracker configurations: {}\n" + "torchx 2024-04-15 15:53:26 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:51:09 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" + "torchx 2024-04-15 15:53:26 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:51:09 INFO Log directory is: /tmp/torchx_3g6fjg0a\n" + "torchx 2024-04-15 15:53:26 INFO Log directory is: /tmp/torchx_mh72sa54\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:51:09 INFO Waiting for the app to finish...\n" + "torchx 2024-04-15 15:53:26 INFO Waiting for the app to finish...\n" ] }, { @@ -185,14 +185,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:51:10 INFO Job finished: SUCCEEDED\n" + "torchx 2024-04-15 15:53:27 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_cwd://torchx/hello_world-wclp7r2vht0fcd\n" + "local_cwd://torchx/hello_world-tx0hp3qlcb737\n" ] } ], @@ -203,7 +203,7 @@ }, { "cell_type": "markdown", - "id": "e3075413", + "id": "0e41932c", "metadata": {}, "source": [ "If we want to run in other environments, we can build a Docker container so we\n", @@ -221,13 +221,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "6558874f", + "id": "2030d00b", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:51:10.067406Z", - "iopub.status.busy": "2024-04-12T18:51:10.067144Z", - "iopub.status.idle": "2024-04-12T18:51:10.071243Z", - "shell.execute_reply": "2024-04-12T18:51:10.070627Z" + "iopub.execute_input": "2024-04-15T15:53:27.131351Z", + "iopub.status.busy": "2024-04-15T15:53:27.130925Z", + "iopub.status.idle": "2024-04-15T15:53:27.135374Z", + "shell.execute_reply": "2024-04-15T15:53:27.134763Z" } }, "outputs": [ @@ -249,7 +249,7 @@ }, { "cell_type": "markdown", - "id": "c2cc1722", + "id": "fbd0a5ae", "metadata": {}, "source": [ "Once we have the Dockerfile created we can create our docker image." @@ -258,13 +258,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "8f847d0b", + "id": "39e30dd8", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:51:10.073674Z", - "iopub.status.busy": "2024-04-12T18:51:10.073226Z", - "iopub.status.idle": "2024-04-12T18:52:40.389799Z", - "shell.execute_reply": "2024-04-12T18:52:40.389175Z" + "iopub.execute_input": "2024-04-15T15:53:27.137707Z", + "iopub.status.busy": "2024-04-15T15:53:27.137413Z", + "iopub.status.idle": "2024-04-15T15:55:06.231916Z", + "shell.execute_reply": "2024-04-15T15:55:06.231256Z" } }, "outputs": [ @@ -349,7 +349,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#3 DONE 0.3s\n" + "#3 DONE 0.4s\n" ] }, { @@ -412,91 +412,91 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 1.05MB / 26.70MB 0.1s\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 0B / 26.70MB 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 0B / 857B 0.1s\n" + "#5 sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 857B / 857B 0.1s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3 3.25kB / 3.25kB done\n" + "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 0B / 189B 0.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c 7.21kB / 7.21kB done\n" + "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 9.94MB / 9.94MB 0.2s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 189B / 189B 0.1s done\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 20.97MB / 26.70MB 0.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 9.94MB / 9.94MB 0.2s done\n" + "#5 sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f 189B / 189B 0.2s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 20.97MB / 26.70MB 0.3s\n" + "#5 sha256:a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3 3.25kB / 3.25kB done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726 857B / 857B 0.1s done\n" + "#5 sha256:3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c 7.21kB / 7.21kB done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 10.49MB / 2.00GB 0.3s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 0B / 2.00GB 0.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 132B / 132B 0.3s done\n" + "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 0B / 132B 0.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0B / 21.46MB 0.3s\n" + "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 26.70MB / 26.70MB 0.4s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 26.70MB / 26.70MB 0.4s done\n" + "#5 sha256:eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa 132B / 132B 0.3s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 4.19MB / 21.46MB 0.4s\n" + "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 0.1s\n" ] }, { @@ -510,21 +510,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0B / 21.46MB 0.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 17.85MB / 21.46MB 0.5s\n" + "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 257B / 257B 0.4s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77de 257B / 257B 0.4s done\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 8.39MB / 21.46MB 0.5s\n" ] }, { @@ -538,98 +538,112 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 21.46MB / 21.46MB 0.5s done\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 21.46MB / 21.46MB 0.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 0B / 352B 0.6s\n" + "#5 sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 21.46MB / 21.46MB 0.6s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 92B / 92B 0.6s done\n" + "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 0B / 352B 0.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 352B / 352B 0.6s done\n" + "#5 sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 92B / 92B 0.7s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 0B / 341.29MB 0.7s\n" + "#5 sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2 352B / 352B 0.7s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 18.87MB / 341.29MB 0.8s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 0B / 341.29MB 0.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 36.70MB / 341.29MB 0.9s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 101.74MB / 2.00GB 1.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 55.57MB / 341.29MB 1.1s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 27.27MB / 341.29MB 1.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 116.39MB / 2.00GB 1.3s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 100.66MB / 1.71GB 1.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 111.15MB / 1.71GB 1.3s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 48.23MB / 341.29MB 1.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 78.64MB / 341.29MB 1.3s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 75.50MB / 341.29MB 1.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 106.54MB / 341.29MB 1.5s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 210.76MB / 2.00GB 1.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 1.4s done\n" + "#5 extracting sha256:4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca 1.3s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 137.36MB / 341.29MB 1.8s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 102.76MB / 341.29MB 1.7s\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "#5 extracting sha256:d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 131.07MB / 341.29MB 1.9s\n" ] }, { @@ -643,350 +657,350 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 191.89MB / 1.71GB 2.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 243.27MB / 2.00GB 2.0s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 163.58MB / 341.29MB 2.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 222.30MB / 1.71GB 2.0s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 191.92MB / 341.29MB 2.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 162.53MB / 341.29MB 2.0s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 325.06MB / 2.00GB 2.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f done\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 223.35MB / 341.29MB 2.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 195.04MB / 341.29MB 2.2s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 292.55MB / 1.71GB 2.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 222.30MB / 341.29MB 2.4s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 253.76MB / 341.29MB 2.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 246.97MB / 341.29MB 2.5s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 287.31MB / 341.29MB 2.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 353.37MB / 2.00GB 2.7s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 427.82MB / 2.00GB 3.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 337.64MB / 1.71GB 2.7s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 310.38MB / 341.29MB 3.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 285.21MB / 341.29MB 2.7s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 385.88MB / 1.71GB 3.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 317.72MB / 341.29MB 2.9s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 337.64MB / 341.29MB 3.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 341.29MB / 341.29MB 3.1s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 528.48MB / 2.00GB 4.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 478.15MB / 2.00GB 3.3s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 482.34MB / 1.71GB 4.3s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 427.82MB / 1.71GB 3.5s\n" + "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 341.29MB / 341.29MB 4.5s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 592.45MB / 2.00GB 3.8s\n" + "#5 extracting sha256:889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 341.29MB / 341.29MB 3.7s done\n" + "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 0B / 563.38kB 4.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 0B / 563.38kB 4.0s\n" + "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 563.38kB / 563.38kB 4.7s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 563.38kB / 563.38kB 4.1s\n" + "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 542.11MB / 1.71GB 4.3s\n" + "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 0B / 556.96kB 4.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800 563.38kB / 563.38kB 4.1s done\n" + "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 556.96kB / 556.96kB 5.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 725.61MB / 2.00GB 4.6s\n" + "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 556.96kB / 556.96kB 5.2s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 0B / 556.96kB 5.0s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 579.86MB / 1.71GB 5.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 651.17MB / 1.71GB 5.2s\n" + "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 1.1s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968 556.96kB / 556.96kB 5.0s done\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 631.24MB / 2.00GB 6.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 838.86MB / 2.00GB 5.4s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 666.89MB / 1.71GB 6.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 940.57MB / 2.00GB 5.8s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 760.22MB / 1.71GB 6.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 759.17MB / 1.71GB 5.8s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 734.00MB / 2.00GB 7.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.04GB / 2.00GB 6.2s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 858.78MB / 1.71GB 7.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 868.22MB / 1.71GB 6.4s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 956.30MB / 1.71GB 7.8s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.15GB / 2.00GB 7.0s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 849.35MB / 2.00GB 8.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 960.50MB / 1.71GB 7.0s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.06GB / 1.71GB 8.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.1s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.15GB / 1.71GB 8.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.26GB / 2.00GB 7.5s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.26GB / 1.71GB 9.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.06GB / 1.71GB 7.7s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.36GB / 1.71GB 10.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.38GB / 2.00GB 8.2s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 950.01MB / 2.00GB 10.1s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203 0.8s done\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.46GB / 1.71GB 10.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.17GB / 1.71GB 8.4s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.56GB / 1.71GB 11.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.49GB / 2.00GB 9.0s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.69GB / 1.71GB 11.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.29GB / 1.71GB 9.5s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.05GB / 2.00GB 12.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.60GB / 2.00GB 9.8s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.16GB / 2.00GB 13.9s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.39GB / 1.71GB 10.3s\n" + "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.71GB / 1.71GB 13.9s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.71GB / 2.00GB 10.5s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.27GB / 2.00GB 15.0s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.52GB / 1.71GB 10.8s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.39GB / 2.00GB 16.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.82GB / 2.00GB 11.1s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.50GB / 2.00GB 17.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.60GB / 1.71GB 11.1s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.61GB / 2.00GB 18.6s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.92GB / 2.00GB 11.7s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.71GB / 2.00GB 19.5s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.71GB / 1.71GB 11.7s\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.82GB / 2.00GB 20.4s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 1.71GB / 1.71GB 12.3s done\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 1.94GB / 2.00GB 21.2s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 2.00GB / 2.00GB 13.1s done\n" + "#5 sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 2.00GB / 2.00GB 22.3s done\n" ] }, { @@ -1035,7 +1049,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 30.1s done\n" + "#5 extracting sha256:143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907 30.2s done\n" ] }, { @@ -1063,7 +1077,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0.8s done\n" + "#5 extracting sha256:d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71 0.7s done\n" ] }, { @@ -1091,7 +1105,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 10.1s\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 10.2s\n" ] }, { @@ -1105,21 +1119,21 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 18.8s done\n" + "#5 extracting sha256:f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6 18.9s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932\n" + "#5 extracting sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932 done\n" + "#5 extracting sha256:30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2\n" ] }, { @@ -1147,14 +1161,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 10.6s\n" + "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 10.7s\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 12.2s done\n" + "#5 extracting sha256:909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233 12.1s done\n" ] }, { @@ -1182,7 +1196,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#5 DONE 82.5s\n" + "#5 DONE 91.8s\n" ] }, { @@ -1231,14 +1245,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "#7 exporting layers 2.6s done\n" + "#7 exporting layers 2.5s done\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "#7 writing image sha256:37c8ad67dc0eb8d93c8dbc9f62edf9534c8e0cda904ae2f8716f235bfb9b5b78 done\n" + "#7 writing image sha256:5411b2b2206f1dc6d9eccb60e0d3ac0f65bdfc9aa2dbb325a18451a61777439b done\n" ] }, { @@ -1252,7 +1266,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "#7 DONE 2.6s\n" + "#7 DONE 2.5s\n" ] } ], @@ -1263,7 +1277,7 @@ }, { "cell_type": "markdown", - "id": "52b8effd", + "id": "ea62e0c8", "metadata": {}, "source": [ "We can then launch it on the local scheduler." @@ -1272,13 +1286,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "4d920549", + "id": "66f10396", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:52:40.392729Z", - "iopub.status.busy": "2024-04-12T18:52:40.392325Z", - "iopub.status.idle": "2024-04-12T18:52:49.109859Z", - "shell.execute_reply": "2024-04-12T18:52:49.109214Z" + "iopub.execute_input": "2024-04-15T15:55:06.234657Z", + "iopub.status.busy": "2024-04-15T15:55:06.234391Z", + "iopub.status.idle": "2024-04-15T15:55:14.631548Z", + "shell.execute_reply": "2024-04-15T15:55:14.630911Z" } }, "outputs": [ @@ -1286,133 +1300,133 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:40 INFO Tracker configurations: {}\n" + "torchx 2024-04-15 15:55:06 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:40 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-04-15 15:55:06 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:40 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-04-15 15:55:06 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:41 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-04-15 15:55:07 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:41 WARNING failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.43/images/create?tag=latest&fromImage=my_app: Not Found (\"pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied\")\n" + "torchx 2024-04-15 15:55:07 WARNING failed to pull image my_app:latest, falling back to local: 404 Client Error for http+docker://localhost/v1.43/images/create?tag=latest&fromImage=my_app: Not Found (\"pull access denied for my_app, repository does not exist or may require 'docker login': denied: requested access to the resource is denied\")\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:41 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-04-15 15:55:07 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:41 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-04-15 15:55:07 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:41 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-04-15 15:55:07 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:41 INFO ---> 37c8ad67dc0e\n" + "torchx 2024-04-15 15:55:07 INFO ---> 5411b2b2206f\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:41 INFO Step 3/4 : COPY . .\n" + "torchx 2024-04-15 15:55:07 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:44 INFO ---> 97fb15451eb2\n" + "torchx 2024-04-15 15:55:10 INFO ---> b196fd38cde5\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:44 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" + "torchx 2024-04-15 15:55:10 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:44 INFO ---> Running in 76c2ffe7a3b9\n" + "torchx 2024-04-15 15:55:10 INFO ---> Running in f08bf61d5238\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:47 INFO Removing intermediate container 76c2ffe7a3b9\n" + "torchx 2024-04-15 15:55:12 INFO Removing intermediate container f08bf61d5238\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:47 INFO ---> e3aab59db1ca\n" + "torchx 2024-04-15 15:55:12 INFO ---> 32818bcb65f3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:47 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-04-15 15:55:12 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:47 INFO Successfully built e3aab59db1ca\n" + "torchx 2024-04-15 15:55:12 INFO Successfully built 32818bcb65f3\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:47 INFO Built new image `sha256:e3aab59db1ca90e0225e17ef40be1c9ee397ae14b0b4d98e3f8d160553c8005d` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.\n" + "torchx 2024-04-15 15:55:12 INFO Built new image `sha256:32818bcb65f3422eb5378f3bc1dd95f50308e4ef5faf3f31e0329bf2f6b5b345` based on original image `my_app:latest` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=greeter.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:48 INFO Waiting for the app to finish...\n" + "torchx 2024-04-15 15:55:13 INFO Waiting for the app to finish...\n" ] }, { @@ -1426,14 +1440,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:49 INFO Job finished: SUCCEEDED\n" + "torchx 2024-04-15 15:55:14 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/hello_world-krph0z37pmr4\n" + "local_docker://torchx/hello_world-v94dh1tjtmjxlc\n" ] } ], @@ -1444,7 +1458,7 @@ }, { "cell_type": "markdown", - "id": "372c79a9", + "id": "0b50706c", "metadata": {}, "source": [ "If you have a Kubernetes cluster you can use the [Kubernetes scheduler](schedulers/kubernetes.rst) to launch\n", @@ -1453,7 +1467,7 @@ }, { "cell_type": "markdown", - "id": "c282448c", + "id": "b579d40b", "metadata": { "lines_to_next_cell": 2, "region_name": "md" @@ -1467,7 +1481,7 @@ }, { "cell_type": "markdown", - "id": "e9248428", + "id": "de9d7c5d", "metadata": {}, "source": [ "## Builtins\n", @@ -1479,13 +1493,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "ceeda169", + "id": "ff576968", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:52:49.112916Z", - "iopub.status.busy": "2024-04-12T18:52:49.112578Z", - "iopub.status.idle": "2024-04-12T18:52:50.032507Z", - "shell.execute_reply": "2024-04-12T18:52:50.031847Z" + "iopub.execute_input": "2024-04-15T15:55:14.634818Z", + "iopub.status.busy": "2024-04-15T15:55:14.634331Z", + "iopub.status.idle": "2024-04-15T15:55:15.535897Z", + "shell.execute_reply": "2024-04-15T15:55:15.535267Z" } }, "outputs": [ @@ -1581,7 +1595,7 @@ }, { "cell_type": "markdown", - "id": "f3039d10", + "id": "30750d2a", "metadata": {}, "source": [ "You can use these either from the CLI, from a pipeline or programmatically like\n", @@ -1591,13 +1605,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "679f1e6d", + "id": "1f747466", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:52:50.035388Z", - "iopub.status.busy": "2024-04-12T18:52:50.035104Z", - "iopub.status.idle": "2024-04-12T18:55:19.310927Z", - "shell.execute_reply": "2024-04-12T18:55:19.310031Z" + "iopub.execute_input": "2024-04-15T15:55:15.538525Z", + "iopub.status.busy": "2024-04-15T15:55:15.538251Z", + "iopub.status.idle": "2024-04-15T15:57:45.946274Z", + "shell.execute_reply": "2024-04-15T15:57:45.945501Z" } }, "outputs": [ @@ -1605,133 +1619,133 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:51 INFO Tracker configurations: {}\n" + "torchx 2024-04-15 15:55:16 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:51 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-04-15 15:55:16 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:51 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-04-15 15:55:16 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:52:51 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-04-15 15:55:16 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:09 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-04-15 15:57:36 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:10 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-04-15 15:57:36 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:10 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-04-15 15:57:36 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:10 INFO ---> b3be52588e18\n" + "torchx 2024-04-15 15:57:36 INFO ---> 803d8840e4a9\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:10 INFO Step 3/4 : COPY . .\n" + "torchx 2024-04-15 15:57:36 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:14 INFO ---> 70cc4f72a637\n" + "torchx 2024-04-15 15:57:41 INFO ---> bdf53e9b5509\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:14 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" + "torchx 2024-04-15 15:57:41 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:14 INFO ---> Running in 1e51f706f9a3\n" + "torchx 2024-04-15 15:57:41 INFO ---> Running in 2b7101fd3b19\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:18 INFO Removing intermediate container 1e51f706f9a3\n" + "torchx 2024-04-15 15:57:45 INFO Removing intermediate container 2b7101fd3b19\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:18 INFO ---> 76e96c1c09fb\n" + "torchx 2024-04-15 15:57:45 INFO ---> 2b432f1ccee8\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:18 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-04-15 15:57:45 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:18 INFO Successfully built 76e96c1c09fb\n" + "torchx 2024-04-15 15:57:45 INFO Successfully built 2b432f1ccee8\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:18 INFO Built new image `sha256:76e96c1c09fbb481889f1b8ca0665394f0100c90b1c6a87c32839fadaad77f3e` based on original image `ghcr.io/pytorch/torchx:0.7.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.\n" + "torchx 2024-04-15 15:57:45 INFO Built new image `sha256:2b432f1ccee87051113dd2e0a651be0bb9035661f30a35717df9eea552bbed06` based on original image `ghcr.io/pytorch/torchx:0.7.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=echo.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:19 INFO Waiting for the app to finish...\n" + "torchx 2024-04-15 15:57:45 INFO Waiting for the app to finish...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:19 INFO Job finished: SUCCEEDED\n" + "torchx 2024-04-15 15:57:45 INFO Job finished: SUCCEEDED\n" ] }, { @@ -1745,7 +1759,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/echo-c75hw600v5g7bc\n" + "local_docker://torchx/echo-z0cz2jdx6c51vc\n" ] } ], diff --git a/0.7.0dev0/pipelines/airflow.html b/0.7.0dev0/pipelines/airflow.html index 57722ef13..900fbbbf6 100644 --- a/0.7.0dev0/pipelines/airflow.html +++ b/0.7.0dev0/pipelines/airflow.html @@ -491,7 +491,7 @@

Airflow
-
/tmp/ipykernel_4183/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.
+
/tmp/ipykernel_4218/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.
 

If all goes well you should see Hello, TorchX! printed above.

diff --git a/0.7.0dev0/pipelines/airflow.ipynb b/0.7.0dev0/pipelines/airflow.ipynb index 499d03434..0223cff3f 100644 --- a/0.7.0dev0/pipelines/airflow.ipynb +++ b/0.7.0dev0/pipelines/airflow.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "d27fbbca", + "id": "9397b1e7", "metadata": {}, "source": [ "# Airflow\n", @@ -18,13 +18,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "dae50d3b", + "id": "e7ddc7a5", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:22.295360Z", - "iopub.status.busy": "2024-04-12T18:55:22.294960Z", - "iopub.status.idle": "2024-04-12T18:55:22.918222Z", - "shell.execute_reply": "2024-04-12T18:55:22.917566Z" + "iopub.execute_input": "2024-04-15T15:57:48.629478Z", + "iopub.status.busy": "2024-04-15T15:57:48.628910Z", + "iopub.status.idle": "2024-04-15T15:57:49.232172Z", + "shell.execute_reply": "2024-04-15T15:57:49.231441Z" } }, "outputs": [], @@ -44,7 +44,7 @@ }, { "cell_type": "markdown", - "id": "7620bb1f", + "id": "841c1c83", "metadata": {}, "source": [ "To launch a TorchX job from Airflow you can create a Airflow Python task to\n", @@ -56,13 +56,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "06572f7f", + "id": "938c7aae", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:22.921183Z", - "iopub.status.busy": "2024-04-12T18:55:22.920877Z", - "iopub.status.idle": "2024-04-12T18:55:22.925702Z", - "shell.execute_reply": "2024-04-12T18:55:22.925120Z" + "iopub.execute_input": "2024-04-15T15:57:49.235380Z", + "iopub.status.busy": "2024-04-15T15:57:49.234817Z", + "iopub.status.idle": "2024-04-15T15:57:49.239749Z", + "shell.execute_reply": "2024-04-15T15:57:49.239136Z" } }, "outputs": [], @@ -93,7 +93,7 @@ }, { "cell_type": "markdown", - "id": "dac8b5fd", + "id": "07521161", "metadata": {}, "source": [ "Once we have the task defined we can put it into a Airflow DAG and run it like\n", @@ -103,24 +103,24 @@ { "cell_type": "code", "execution_count": 3, - "id": "1c4d5bc4", + "id": "bf91d38d", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:22.928327Z", - "iopub.status.busy": "2024-04-12T18:55:22.927947Z", - "iopub.status.idle": "2024-04-12T18:55:24.203324Z", - "shell.execute_reply": "2024-04-12T18:55:24.202566Z" + "iopub.execute_input": "2024-04-15T15:57:49.242104Z", + "iopub.status.busy": "2024-04-15T15:57:49.241769Z", + "iopub.status.idle": "2024-04-15T15:57:50.499631Z", + "shell.execute_reply": "2024-04-15T15:57:50.499033Z" } }, "outputs": [ { "data": { "text/html": [ - "
/tmp/ipykernel_4183/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\n",
+       "
/tmp/ipykernel_4218/454499020.py:3 RemovedInAirflow3Warning: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\n",
        "
\n" ], "text/plain": [ - "\u001b[1;33m/tmp/ipykernel_4183/\u001b[0m\u001b[1;33m454499020.py\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m3\u001b[0m\u001b[1;33m RemovedInAirflow3Warning\u001b[0m\u001b[33m: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\u001b[0m\n" + "\u001b[1;33m/tmp/ipykernel_4218/\u001b[0m\u001b[1;33m454499020.py\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m3\u001b[0m\u001b[1;33m RemovedInAirflow3Warning\u001b[0m\u001b[33m: Param `schedule_interval` is deprecated and will be removed in a future release. Please use `schedule` instead.\u001b[0m\n" ] }, "metadata": {}, @@ -130,70 +130,70 @@ "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:23.143+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2073} INFO\u001b[0m - Dependencies all met for dep_context=non-requeueable deps ti=\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:49.440+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2073} INFO\u001b[0m - Dependencies all met for dep_context=non-requeueable deps ti=\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:23.148+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2073} INFO\u001b[0m - Dependencies all met for dep_context=requeueable deps ti=\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:49.446+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2073} INFO\u001b[0m - Dependencies all met for dep_context=requeueable deps ti=\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:23.149+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2303} INFO\u001b[0m - Starting attempt 1 of 1\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:49.447+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2303} INFO\u001b[0m - Starting attempt 1 of 1\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:23.150+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2385} WARNING\u001b[0m - cannot record queued_duration for task hello_torchx because previous state change time has not been saved\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:49.447+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2385} WARNING\u001b[0m - cannot record queued_duration for task hello_torchx because previous state change time has not been saved\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:23.160+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2327} INFO\u001b[0m - Executing on 2021-09-13 00:00:00+00:00\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:49.457+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2327} INFO\u001b[0m - Executing on 2021-09-13 00:00:00+00:00\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:23.420+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2644} INFO\u001b[0m - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-gqq75v6pktlbtc' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:49.701+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m2644} INFO\u001b[0m - Exporting env vars: AIRFLOW_CTX_DAG_OWNER='airflow' AIRFLOW_CTX_DAG_ID='example_python_operator-d3ghzhj3zbttmd' AIRFLOW_CTX_TASK_ID='hello_torchx' AIRFLOW_CTX_EXECUTION_DATE='2021-09-13T00:00:00+00:00' AIRFLOW_CTX_TRY_NUMBER='1' AIRFLOW_CTX_DAG_RUN_ID='manual__2021-09-13T00:00:00+00:00'\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:23.423+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m430} INFO\u001b[0m - ::endgroup::\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:49.703+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m430} INFO\u001b[0m - ::endgroup::\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:24.072+0000\u001b[0m] {\u001b[34mapi.py:\u001b[0m72} INFO\u001b[0m - Tracker configurations: {}\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:50.375+0000\u001b[0m] {\u001b[34mapi.py:\u001b[0m72} INFO\u001b[0m - Tracker configurations: {}\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:24.075+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m771} INFO\u001b[0m - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:50.379+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m771} INFO\u001b[0m - Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:24.076+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m777} INFO\u001b[0m - Log directory is: /tmp/torchx_l_vf3y29\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:50.380+0000\u001b[0m] {\u001b[34mlocal_scheduler.py:\u001b[0m777} INFO\u001b[0m - Log directory is: /tmp/torchx_p5ojxbdz\u001b[0m\n" ] }, { @@ -201,21 +201,21 @@ "output_type": "stream", "text": [ "Hello, TorchX!\n", - "[\u001b[34m2024-04-12T18:55:24.185+0000\u001b[0m] {\u001b[34mpython.py:\u001b[0m237} INFO\u001b[0m - Done. Returned value was: None\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:50.486+0000\u001b[0m] {\u001b[34mpython.py:\u001b[0m237} INFO\u001b[0m - Done. Returned value was: None\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:24.186+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m441} INFO\u001b[0m - ::group::Post task execution logs\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:50.487+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m441} INFO\u001b[0m - ::group::Post task execution logs\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "[\u001b[34m2024-04-12T18:55:24.193+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m1205} INFO\u001b[0m - Marking task as SUCCESS. dag_id=example_python_operator-gqq75v6pktlbtc, task_id=hello_torchx, execution_date=20210913T000000, start_date=20240412T185523, end_date=20240412T185524\u001b[0m\n" + "[\u001b[34m2024-04-15T15:57:50.491+0000\u001b[0m] {\u001b[34mtaskinstance.py:\u001b[0m1205} INFO\u001b[0m - Marking task as SUCCESS. dag_id=example_python_operator-d3ghzhj3zbttmd, task_id=hello_torchx, execution_date=20210913T000000, start_date=20240415T155749, end_date=20240415T155750\u001b[0m\n" ] } ], @@ -247,7 +247,7 @@ }, { "cell_type": "markdown", - "id": "0f2c3432", + "id": "3c05f743", "metadata": {}, "source": [ "If all goes well you should see `Hello, TorchX!` printed above.\n", diff --git a/0.7.0dev0/quickstart.html b/0.7.0dev0/quickstart.html index a3e9e1891..41f44ad75 100644 --- a/0.7.0dev0/quickstart.html +++ b/0.7.0dev0/quickstart.html @@ -887,7 +887,7 @@

Launching
-torchx 2024-04-12 18:55:28 INFO     Tracker configurations: {}
+torchx 2024-04-15 15:57:54 INFO     Tracker configurations: {}
 

We can run the exact same app via the local_docker scheduler. This scheduler will package up the local workspace as a layer on top of the specified image. This provides a very similar environment to the container based remote schedulers.

@@ -955,7 +955,7 @@

Launching
-torchx 2024-04-12 18:55:30 INFO     Tracker configurations: {}
+torchx 2024-04-15 15:57:56 INFO     Tracker configurations: {}
 

TorchX defaults to using the ghcr.io/pytorch/torchx Docker container image which contains the PyTorch libraries, TorchX and related dependencies.

@@ -1630,7 +1630,7 @@

Distributed
-torchx 2024-04-12 18:55:43 INFO     Tracker configurations: {}
+torchx 2024-04-15 15:58:09 INFO     Tracker configurations: {}
 
@@ -4040,7 +4040,7 @@

Docker-based Schedulers
-torchx 2024-04-12 18:56:02 INFO     loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig
+torchx 2024-04-15 15:58:28 INFO     loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig
 
diff --git a/0.7.0dev0/quickstart.ipynb b/0.7.0dev0/quickstart.ipynb index 1c08d4d17..54fced457 100644 --- a/0.7.0dev0/quickstart.ipynb +++ b/0.7.0dev0/quickstart.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "27a4de49", + "id": "849de2c9", "metadata": {}, "source": [ "# Quickstart\n", @@ -18,7 +18,7 @@ }, { "cell_type": "markdown", - "id": "35f8d6ad", + "id": "ae9e5d55", "metadata": { "region_name": "md" }, @@ -31,7 +31,7 @@ }, { "cell_type": "markdown", - "id": "7a08df1e", + "id": "a34ce022", "metadata": {}, "source": [ "See the [README](https://github.com/pytorch/torchx) for more\n", @@ -41,13 +41,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "5848e7f6", + "id": "5d1ec617", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:26.305020Z", - "iopub.status.busy": "2024-04-12T18:55:26.304504Z", - "iopub.status.idle": "2024-04-12T18:55:26.584244Z", - "shell.execute_reply": "2024-04-12T18:55:26.583511Z" + "iopub.execute_input": "2024-04-15T15:57:52.738385Z", + "iopub.status.busy": "2024-04-15T15:57:52.737877Z", + "iopub.status.idle": "2024-04-15T15:57:53.016811Z", + "shell.execute_reply": "2024-04-15T15:57:53.016010Z" } }, "outputs": [ @@ -171,7 +171,7 @@ }, { "cell_type": "markdown", - "id": "90dffd9e", + "id": "1d5e7dd5", "metadata": {}, "source": [ "## Hello World\n", @@ -189,13 +189,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "02343225", + "id": "1cf0f552", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:26.587053Z", - "iopub.status.busy": "2024-04-12T18:55:26.586706Z", - "iopub.status.idle": "2024-04-12T18:55:26.591145Z", - "shell.execute_reply": "2024-04-12T18:55:26.590526Z" + "iopub.execute_input": "2024-04-15T15:57:53.020755Z", + "iopub.status.busy": "2024-04-15T15:57:53.020416Z", + "iopub.status.idle": "2024-04-15T15:57:53.024930Z", + "shell.execute_reply": "2024-04-15T15:57:53.024371Z" } }, "outputs": [ @@ -217,7 +217,7 @@ }, { "cell_type": "markdown", - "id": "1b578e0a", + "id": "3b4c0802", "metadata": {}, "source": [ "## Launching\n", @@ -231,13 +231,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "9480ed15", + "id": "a68228be", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:26.593604Z", - "iopub.status.busy": "2024-04-12T18:55:26.593178Z", - "iopub.status.idle": "2024-04-12T18:55:27.547676Z", - "shell.execute_reply": "2024-04-12T18:55:27.546946Z" + "iopub.execute_input": "2024-04-15T15:57:53.027345Z", + "iopub.status.busy": "2024-04-15T15:57:53.026952Z", + "iopub.status.idle": "2024-04-15T15:57:53.966300Z", + "shell.execute_reply": "2024-04-15T15:57:53.965503Z" } }, "outputs": [ @@ -494,7 +494,7 @@ }, { "cell_type": "markdown", - "id": "f36fe3bb", + "id": "062b7ba6", "metadata": {}, "source": [ "The component takes in the script name and any extra arguments will be passed to\n", @@ -504,13 +504,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "5c74ad10", + "id": "47c4d663", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:27.552475Z", - "iopub.status.busy": "2024-04-12T18:55:27.552257Z", - "iopub.status.idle": "2024-04-12T18:55:29.522405Z", - "shell.execute_reply": "2024-04-12T18:55:29.521747Z" + "iopub.execute_input": "2024-04-15T15:57:53.973289Z", + "iopub.status.busy": "2024-04-15T15:57:53.972890Z", + "iopub.status.idle": "2024-04-15T15:57:55.894341Z", + "shell.execute_reply": "2024-04-15T15:57:55.893732Z" } }, "outputs": [ @@ -518,28 +518,28 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:28 INFO Tracker configurations: {}\n" + "torchx 2024-04-15 15:57:54 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:28 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" + "torchx 2024-04-15 15:57:54 INFO Log directory not set in scheduler cfg. Creating a temporary log dir that will be deleted on exit. To preserve log directory set the `log_dir` cfg option\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:28 INFO Log directory is: /tmp/torchx_9t2ssdcr\n" + "torchx 2024-04-15 15:57:54 INFO Log directory is: /tmp/torchx_fj9jio7k\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:28 INFO Waiting for the app to finish...\n" + "torchx 2024-04-15 15:57:54 INFO Waiting for the app to finish...\n" ] }, { @@ -553,14 +553,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:29 INFO Job finished: SUCCEEDED\n" + "torchx 2024-04-15 15:57:55 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_cwd://torchx/torchx_utils_python-jwhx7f9l94xtz\n" + "local_cwd://torchx/torchx_utils_python-nn99tqg2cmn6r\n" ] } ], @@ -571,7 +571,7 @@ }, { "cell_type": "markdown", - "id": "0977a5c5", + "id": "1e58827e", "metadata": {}, "source": [ "We can run the exact same app via the `local_docker` scheduler. This scheduler\n", @@ -590,13 +590,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "b0646ed8", + "id": "684a47c8", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:29.525656Z", - "iopub.status.busy": "2024-04-12T18:55:29.525143Z", - "iopub.status.idle": "2024-04-12T18:55:40.904638Z", - "shell.execute_reply": "2024-04-12T18:55:40.903968Z" + "iopub.execute_input": "2024-04-15T15:57:55.897048Z", + "iopub.status.busy": "2024-04-15T15:57:55.896725Z", + "iopub.status.idle": "2024-04-15T15:58:07.086344Z", + "shell.execute_reply": "2024-04-15T15:58:07.085694Z" } }, "outputs": [ @@ -604,126 +604,126 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO Tracker configurations: {}\n" + "torchx 2024-04-15 15:57:56 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-04-15 15:57:56 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-04-15 15:57:56 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-04-15 15:57:56 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-04-15 15:57:57 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-04-15 15:57:57 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-04-15 15:57:57 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO ---> b3be52588e18\n" + "torchx 2024-04-15 15:57:57 INFO ---> 803d8840e4a9\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:30 INFO Step 3/4 : COPY . .\n" + "torchx 2024-04-15 15:57:57 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:35 INFO ---> 6aaa794c60af\n" + "torchx 2024-04-15 15:58:01 INFO ---> 695d2846818e\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:35 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" + "torchx 2024-04-15 15:58:01 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:35 INFO ---> Running in 4e5d8a0f0434\n" + "torchx 2024-04-15 15:58:01 INFO ---> Running in 2fa7d3bbf02a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:39 INFO Removing intermediate container 4e5d8a0f0434\n" + "torchx 2024-04-15 15:58:05 INFO Removing intermediate container 2fa7d3bbf02a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:39 INFO ---> ee2bea9b99ba\n" + "torchx 2024-04-15 15:58:05 INFO ---> 95856f7d6c9a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:39 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-04-15 15:58:05 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:39 INFO Successfully built ee2bea9b99ba\n" + "torchx 2024-04-15 15:58:05 INFO Successfully built 95856f7d6c9a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:39 INFO Built new image `sha256:ee2bea9b99badfc6a0e142601f3b6dd422502658577dc700fe23f591753f6201` based on original image `ghcr.io/pytorch/torchx:0.7.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" + "torchx 2024-04-15 15:58:05 INFO Built new image `sha256:95856f7d6c9a9cb1d83b8e6a398b164800d994de4ffbc76c8064e48c6a48f27e` based on original image `ghcr.io/pytorch/torchx:0.7.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:39 INFO Waiting for the app to finish...\n" + "torchx 2024-04-15 15:58:06 INFO Waiting for the app to finish...\n" ] }, { @@ -737,14 +737,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:40 INFO Job finished: SUCCEEDED\n" + "torchx 2024-04-15 15:58:07 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/torchx_utils_python-ll7d9chml5wpkc\n" + "local_docker://torchx/torchx_utils_python-mpzvzd0kr3lf9c\n" ] } ], @@ -755,7 +755,7 @@ }, { "cell_type": "markdown", - "id": "405ff6a9", + "id": "71e90592", "metadata": {}, "source": [ "TorchX defaults to using the\n", @@ -773,13 +773,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "6736f954", + "id": "7a9ba49a", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:40.907748Z", - "iopub.status.busy": "2024-04-12T18:55:40.907342Z", - "iopub.status.idle": "2024-04-12T18:55:41.960669Z", - "shell.execute_reply": "2024-04-12T18:55:41.959861Z" + "iopub.execute_input": "2024-04-15T15:58:07.089170Z", + "iopub.status.busy": "2024-04-15T15:58:07.088917Z", + "iopub.status.idle": "2024-04-15T15:58:08.145008Z", + "shell.execute_reply": "2024-04-15T15:58:08.144285Z" } }, "outputs": [ @@ -1190,7 +1190,7 @@ }, { "cell_type": "markdown", - "id": "3c5b1f73", + "id": "39f98ff0", "metadata": {}, "source": [ "Lets create a slightly more interesting app to leverage the TorchX distributed\n", @@ -1200,13 +1200,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "7a464b6b", + "id": "0f737532", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:41.969531Z", - "iopub.status.busy": "2024-04-12T18:55:41.969002Z", - "iopub.status.idle": "2024-04-12T18:55:41.973494Z", - "shell.execute_reply": "2024-04-12T18:55:41.972897Z" + "iopub.execute_input": "2024-04-15T15:58:08.152878Z", + "iopub.status.busy": "2024-04-15T15:58:08.152616Z", + "iopub.status.idle": "2024-04-15T15:58:08.156930Z", + "shell.execute_reply": "2024-04-15T15:58:08.156317Z" } }, "outputs": [ @@ -1234,7 +1234,7 @@ }, { "cell_type": "markdown", - "id": "0f508a29", + "id": "8740c2ea", "metadata": {}, "source": [ "Let launch a small job with 2 nodes and 2 worker processes per node:" @@ -1243,13 +1243,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "4f50ed8f", + "id": "86f519ed", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:55:41.976056Z", - "iopub.status.busy": "2024-04-12T18:55:41.975699Z", - "iopub.status.idle": "2024-04-12T18:56:00.588680Z", - "shell.execute_reply": "2024-04-12T18:56:00.588029Z" + "iopub.execute_input": "2024-04-15T15:58:08.159373Z", + "iopub.status.busy": "2024-04-15T15:58:08.158944Z", + "iopub.status.idle": "2024-04-15T15:58:26.558666Z", + "shell.execute_reply": "2024-04-15T15:58:26.558025Z" } }, "outputs": [ @@ -1257,182 +1257,182 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO Tracker configurations: {}\n" + "torchx 2024-04-15 15:58:09 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-04-15 15:58:09 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-04-15 15:58:09 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-04-15 15:58:09 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-04-15 15:58:09 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO Step 1/4 : ARG IMAGE\n" + "torchx 2024-04-15 15:58:09 INFO Step 1/4 : ARG IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO Step 2/4 : FROM $IMAGE\n" + "torchx 2024-04-15 15:58:09 INFO Step 2/4 : FROM $IMAGE\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO ---> b3be52588e18\n" + "torchx 2024-04-15 15:58:09 INFO ---> 803d8840e4a9\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:43 INFO Step 3/4 : COPY . .\n" + "torchx 2024-04-15 15:58:09 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:47 INFO ---> ea48253970d2\n" + "torchx 2024-04-15 15:58:13 INFO ---> 562e27958b97\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:47 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" + "torchx 2024-04-15 15:58:13 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:47 INFO ---> Running in 82940abe873f\n" + "torchx 2024-04-15 15:58:13 INFO ---> Running in b2fae9f9d2ae\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:52 INFO Removing intermediate container 82940abe873f\n" + "torchx 2024-04-15 15:58:17 INFO Removing intermediate container b2fae9f9d2ae\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:52 INFO ---> 61c5f68d0cce\n" + "torchx 2024-04-15 15:58:17 INFO ---> 86ae5bbe4c84\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:52 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" + "torchx 2024-04-15 15:58:17 INFO [Warning] One or more build-args [WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:52 INFO Successfully built 61c5f68d0cce\n" + "torchx 2024-04-15 15:58:18 INFO Successfully built 86ae5bbe4c84\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:52 INFO Built new image `sha256:61c5f68d0cce00660ce133362dd947508fb13e2c6029e9b83b35d7bf3ba4a10e` based on original image `ghcr.io/pytorch/torchx:0.7.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.\n" + "torchx 2024-04-15 15:58:18 INFO Built new image `sha256:86ae5bbe4c844c347143f95b42531c1e4411ad0c5e00a6cc6ef74d6a7508e2ad` based on original image `ghcr.io/pytorch/torchx:0.7.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=dist_app.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:55:52 INFO Waiting for the app to finish...\n" + "torchx 2024-04-15 15:58:18 INFO Waiting for the app to finish...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-04-12 18:55:53,594] torch.distributed.run: [WARNING] \n" + "dist_app/1 [2024-04-15 15:58:19,447] torch.distributed.run: [WARNING] \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-04-12 18:55:53,594] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/1 [2024-04-15 15:58:19,447] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-04-12 18:55:53,594] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" + "dist_app/1 [2024-04-15 15:58:19,447] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [2024-04-12 18:55:53,594] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/1 [2024-04-15 15:58:19,447] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-04-12 18:55:53,596] torch.distributed.run: [WARNING] \n" + "dist_app/0 [2024-04-15 15:58:19,465] torch.distributed.run: [WARNING] \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-04-12 18:55:53,596] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/0 [2024-04-15 15:58:19,465] torch.distributed.run: [WARNING] *****************************************\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-04-12 18:55:53,596] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" + "dist_app/0 [2024-04-15 15:58:19,465] torch.distributed.run: [WARNING] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. \n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/1 [2024-04-12 18:55:53,596] torch.distributed.run: [WARNING] *****************************************\n" + "dist_app/0 [2024-04-15 15:58:19,465] torch.distributed.run: [WARNING] *****************************************\n" ] }, { @@ -1467,42 +1467,42 @@ "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [1]:I am worker 1 of 4!\n" + "dist_app/0 [0]:I am worker 0 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [1]:all_reduce output = tensor([6])\n" + "dist_app/0 [0]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [0]:I am worker 0 of 4!\n" + "dist_app/0 [1]:I am worker 1 of 4!\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "dist_app/0 [0]:all_reduce output = tensor([6])\n" + "dist_app/0 [1]:all_reduce output = tensor([6])\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:56:00 INFO Job finished: SUCCEEDED\n" + "torchx 2024-04-15 15:58:26 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/dist_app-v4b2prrc0wl4kd\n" + "local_docker://torchx/dist_app-xgb0tsflbdb5kc\n" ] } ], @@ -1513,7 +1513,7 @@ }, { "cell_type": "markdown", - "id": "6e3a713b", + "id": "63ab8a3a", "metadata": {}, "source": [ "## Workspaces / Patching\n", @@ -1542,13 +1542,13 @@ { "cell_type": "code", "execution_count": 9, - "id": "a2ebd00a", + "id": "170b6fdc", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:56:00.591969Z", - "iopub.status.busy": "2024-04-12T18:56:00.591487Z", - "iopub.status.idle": "2024-04-12T18:56:00.595864Z", - "shell.execute_reply": "2024-04-12T18:56:00.595283Z" + "iopub.execute_input": "2024-04-15T15:58:26.561541Z", + "iopub.status.busy": "2024-04-15T15:58:26.561289Z", + "iopub.status.idle": "2024-04-15T15:58:26.565398Z", + "shell.execute_reply": "2024-04-15T15:58:26.564803Z" } }, "outputs": [ @@ -1573,7 +1573,7 @@ }, { "cell_type": "markdown", - "id": "b1ce11d9", + "id": "fe1b6e54", "metadata": {}, "source": [ "## Remote Schedulers\n", @@ -1588,7 +1588,7 @@ }, { "cell_type": "markdown", - "id": "96266727", + "id": "37ec9618", "metadata": { "region_name": "md" }, @@ -1603,7 +1603,7 @@ }, { "cell_type": "markdown", - "id": "24a91642", + "id": "d72c8e31", "metadata": {}, "source": [ "Depending on the scheduler there may be a few extra configuration parameters so\n", @@ -1613,7 +1613,7 @@ }, { "cell_type": "markdown", - "id": "6a6ce835", + "id": "ca67e798", "metadata": {}, "source": [ "All config options:" @@ -1622,13 +1622,13 @@ { "cell_type": "code", "execution_count": 10, - "id": "c1a915c3", + "id": "0a0e6dfd", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:56:00.598475Z", - "iopub.status.busy": "2024-04-12T18:56:00.598075Z", - "iopub.status.idle": "2024-04-12T18:56:02.465683Z", - "shell.execute_reply": "2024-04-12T18:56:02.465042Z" + "iopub.execute_input": "2024-04-15T15:58:26.567925Z", + "iopub.status.busy": "2024-04-15T15:58:26.567442Z", + "iopub.status.idle": "2024-04-15T15:58:28.294133Z", + "shell.execute_reply": "2024-04-15T15:58:28.293478Z" }, "lines_to_next_cell": 2 }, @@ -3398,7 +3398,7 @@ }, { "cell_type": "markdown", - "id": "168ed2e7", + "id": "11affef0", "metadata": {}, "source": [ "## Custom Images\n", @@ -3413,13 +3413,13 @@ { "cell_type": "code", "execution_count": 11, - "id": "3bdd4eee", + "id": "256660e9", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:56:02.469086Z", - "iopub.status.busy": "2024-04-12T18:56:02.468566Z", - "iopub.status.idle": "2024-04-12T18:56:02.472966Z", - "shell.execute_reply": "2024-04-12T18:56:02.472402Z" + "iopub.execute_input": "2024-04-15T15:58:28.297150Z", + "iopub.status.busy": "2024-04-15T15:58:28.296884Z", + "iopub.status.idle": "2024-04-15T15:58:28.301165Z", + "shell.execute_reply": "2024-04-15T15:58:28.300601Z" } }, "outputs": [ @@ -3442,13 +3442,13 @@ { "cell_type": "code", "execution_count": 12, - "id": "49942f68", + "id": "bcf3f05e", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:56:02.475660Z", - "iopub.status.busy": "2024-04-12T18:56:02.475219Z", - "iopub.status.idle": "2024-04-12T18:56:02.478848Z", - "shell.execute_reply": "2024-04-12T18:56:02.478206Z" + "iopub.execute_input": "2024-04-15T15:58:28.303580Z", + "iopub.status.busy": "2024-04-15T15:58:28.303123Z", + "iopub.status.idle": "2024-04-15T15:58:28.306806Z", + "shell.execute_reply": "2024-04-15T15:58:28.306196Z" } }, "outputs": [ @@ -3472,7 +3472,7 @@ }, { "cell_type": "markdown", - "id": "7420186e", + "id": "383d28d6", "metadata": {}, "source": [ "Once we have the Dockerfile created we can launch as normal and TorchX will\n", @@ -3483,13 +3483,13 @@ { "cell_type": "code", "execution_count": 13, - "id": "74ffdb01", + "id": "68bde34f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T18:56:02.481295Z", - "iopub.status.busy": "2024-04-12T18:56:02.480946Z", - "iopub.status.idle": "2024-04-12T18:57:29.638882Z", - "shell.execute_reply": "2024-04-12T18:57:29.638072Z" + "iopub.execute_input": "2024-04-15T15:58:28.309241Z", + "iopub.status.busy": "2024-04-15T15:58:28.308789Z", + "iopub.status.idle": "2024-04-15T16:00:04.687445Z", + "shell.execute_reply": "2024-04-15T16:00:04.686674Z" } }, "outputs": [ @@ -3497,357 +3497,357 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:56:02 INFO loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig\n" + "torchx 2024-04-15 15:58:28 INFO loaded configs from /home/runner/work/torchx/torchx/docs/source/.torchxconfig\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:56:03 INFO Tracker configurations: {}\n" + "torchx 2024-04-15 15:58:29 INFO Tracker configurations: {}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:56:03 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" + "torchx 2024-04-15 15:58:29 INFO Checking for changes in workspace `file:///home/runner/work/torchx/torchx/docs/source`...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:56:03 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" + "torchx 2024-04-15 15:58:29 INFO To disable workspaces pass: --workspace=\"\" from CLI or workspace=None programmatically.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:56:03 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" + "torchx 2024-04-15 15:58:29 INFO Workspace `file:///home/runner/work/torchx/torchx/docs/source` resolved to filesystem path `/home/runner/work/torchx/torchx/docs/source`\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:56:03 INFO Building workspace docker image (this may take a while)...\n" + "torchx 2024-04-15 15:58:29 INFO Building workspace docker image (this may take a while)...\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:56:03 INFO Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime\n" + "torchx 2024-04-15 15:58:29 INFO Step 1/4 : FROM pytorch/pytorch:1.10.0-cuda11.3-cudnn8-runtime\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:19 INFO ---> c3f17e5ac010\n" + "torchx 2024-04-15 15:59:54 INFO ---> c3f17e5ac010\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:19 INFO Step 2/4 : RUN pip install timm\n" + "torchx 2024-04-15 15:59:54 INFO Step 2/4 : RUN pip install timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:19 INFO ---> Running in 38bd441c7f93\n" + "torchx 2024-04-15 15:59:54 INFO ---> Running in bb20453f5871\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Collecting timm\n" + "torchx 2024-04-15 15:59:55 INFO Collecting timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)\n" + "torchx 2024-04-15 15:59:55 INFO Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)\n" + "torchx 2024-04-15 15:59:55 INFO Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)\n" + "torchx 2024-04-15 15:59:55 INFO Requirement already satisfied: pyyaml in /opt/conda/lib/python3.7/site-packages (from timm) (5.4.1)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Requirement already satisfied: torch>=1.7 in /opt/conda/lib/python3.7/site-packages (from timm) (1.10.0)\n" + "torchx 2024-04-15 15:59:55 INFO Collecting huggingface-hub\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Collecting safetensors\n" + "torchx 2024-04-15 15:59:55 INFO Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Downloading safetensors-0.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n" + "torchx 2024-04-15 15:59:55 INFO Requirement already satisfied: torchvision in /opt/conda/lib/python3.7/site-packages (from timm) (0.11.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Collecting huggingface-hub\n" + "torchx 2024-04-15 15:59:56 INFO Collecting safetensors\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)\n" + "torchx 2024-04-15 15:59:56 INFO Downloading safetensors-0.4.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:20 INFO Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: typing_extensions in /opt/conda/lib/python3.7/site-packages (from torch>=1.7->timm) (3.10.0.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Collecting fsspec\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Collecting packaging>=20.9\n" + "torchx 2024-04-15 15:59:56 INFO Collecting packaging>=20.9\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Downloading packaging-24.0-py3-none-any.whl (53 kB)\n" + "torchx 2024-04-15 15:59:56 INFO Downloading packaging-24.0-py3-none-any.whl (53 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: requests in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (2.25.1)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: tqdm>=4.42.1 in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (4.61.2)\n" + "torchx 2024-04-15 15:59:56 INFO Collecting fsspec\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Collecting importlib-metadata\n" + "torchx 2024-04-15 15:59:56 INFO Downloading fsspec-2023.1.0-py3-none-any.whl (143 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)\n" + "torchx 2024-04-15 15:59:56 INFO Collecting importlib-metadata\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: filelock in /opt/conda/lib/python3.7/site-packages (from huggingface-hub->timm) (3.0.12)\n" + "torchx 2024-04-15 15:59:56 INFO Downloading importlib_metadata-6.7.0-py3-none-any.whl (22 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Collecting zipp>=0.5\n" + "torchx 2024-04-15 15:59:56 INFO Collecting zipp>=0.5\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)\n" + "torchx 2024-04-15 15:59:56 INFO Downloading zipp-3.15.0-py3-none-any.whl (6.8 kB)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2021.10.8)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (1.26.6)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (2.10)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.7/site-packages (from requests->huggingface-hub->timm) (4.0.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: numpy in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (1.21.2)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)\n" + "torchx 2024-04-15 15:59:56 INFO Requirement already satisfied: pillow!=8.3.0,>=5.3.0 in /opt/conda/lib/python3.7/site-packages (from torchvision->timm) (8.4.0)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:21 INFO Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm\n" + "torchx 2024-04-15 15:59:56 INFO Installing collected packages: zipp, packaging, importlib-metadata, fsspec, safetensors, huggingface-hub, timm\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:22 INFO Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.2 timm-0.9.12 zipp-3.15.0\n" + "torchx 2024-04-15 15:59:57 INFO Successfully installed fsspec-2023.1.0 huggingface-hub-0.16.4 importlib-metadata-6.7.0 packaging-24.0 safetensors-0.4.2 timm-0.9.12 zipp-3.15.0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:24 INFO Removing intermediate container 38bd441c7f93\n" + "torchx 2024-04-15 15:59:59 INFO Removing intermediate container bb20453f5871\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:24 INFO ---> 2d09ae5bbdf9\n" + "torchx 2024-04-15 15:59:59 INFO ---> f4c69858960a\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:24 INFO Step 3/4 : COPY . .\n" + "torchx 2024-04-15 15:59:59 INFO Step 3/4 : COPY . .\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:25 INFO ---> 589835cf9c31\n" + "torchx 2024-04-15 16:00:01 INFO ---> c12ecbe590ae\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:25 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" + "torchx 2024-04-15 16:00:01 INFO Step 4/4 : LABEL torchx.pytorch.org/version=0.7.0dev0\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:25 INFO ---> Running in 330b754b3c31\n" + "torchx 2024-04-15 16:00:01 INFO ---> Running in 438e4363325c\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:27 INFO Removing intermediate container 330b754b3c31\n" + "torchx 2024-04-15 16:00:02 INFO Removing intermediate container 438e4363325c\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:27 INFO ---> 90c92dd1378a\n" + "torchx 2024-04-15 16:00:02 INFO ---> feadda048372\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:27 INFO [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed\n" + "torchx 2024-04-15 16:00:02 INFO [Warning] One or more build-args [IMAGE WORKSPACE] were not consumed\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:27 INFO Successfully built 90c92dd1378a\n" + "torchx 2024-04-15 16:00:02 INFO Successfully built feadda048372\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:27 INFO Built new image `sha256:90c92dd1378a48242eb294a36046bd04d0eb95aad22ed727816b26b7be73de83` based on original image `ghcr.io/pytorch/torchx:0.7.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" + "torchx 2024-04-15 16:00:02 INFO Built new image `sha256:feadda04837239fc5d424a171dc886aedd3cf45942103decea3775796c4dc8fe` based on original image `ghcr.io/pytorch/torchx:0.7.0dev0` and changes in workspace `file:///home/runner/work/torchx/torchx/docs/source` for role[0]=python.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:27 INFO Waiting for the app to finish...\n" + "torchx 2024-04-15 16:00:02 INFO Waiting for the app to finish...\n" ] }, { @@ -4610,14 +4610,14 @@ "name": "stderr", "output_type": "stream", "text": [ - "torchx 2024-04-12 18:57:29 INFO Job finished: SUCCEEDED\n" + "torchx 2024-04-15 16:00:04 INFO Job finished: SUCCEEDED\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "local_docker://torchx/torchx_utils_python-mj0xbd3r65t2rd\n" + "local_docker://torchx/torchx_utils_python-kg0h7f7c52xbfd\n" ] } ], @@ -4628,7 +4628,7 @@ }, { "cell_type": "markdown", - "id": "4fce3ec7", + "id": "b26eb0aa", "metadata": {}, "source": [ "### Slurm\n", @@ -4639,7 +4639,7 @@ }, { "cell_type": "markdown", - "id": "91a02cc7", + "id": "088f1811", "metadata": {}, "source": [ "## Next Steps\n", diff --git a/0.7.0dev0/searchindex.js b/0.7.0dev0/searchindex.js index 954f39388..745b7f937 100644 --- a/0.7.0dev0/searchindex.js +++ b/0.7.0dev0/searchindex.js @@ -1 +1 @@ -Search.setIndex({"docnames": ["advanced", "app_best_practices", "basics", "cli", "component_best_practices", "components/distributed", "components/interpret", "components/metrics", "components/overview", "components/serve", "components/train", "components/utils", "custom_components", "examples_apps/compute_world_size/main", "examples_apps/datapreproc/datapreproc", "examples_apps/index", "examples_apps/lightning/data", "examples_apps/lightning/interpret", "examples_apps/lightning/model", "examples_apps/lightning/profiler", "examples_apps/lightning/train", "examples_pipelines/index", "examples_pipelines/kfp/advanced_pipeline", "examples_pipelines/kfp/dist_pipeline", "examples_pipelines/kfp/intro_pipeline", "index", "pipelines", "pipelines/airflow", "pipelines/kfp", "quickstart", "runner", "runner.config", "runtime/overview", "runtime/tracking", "schedulers", "schedulers/aws_batch", "schedulers/aws_sagemaker", "schedulers/docker", "schedulers/gcp_batch", "schedulers/kubernetes", "schedulers/kubernetes_mcad", "schedulers/local", "schedulers/lsf", "schedulers/ray", "schedulers/slurm", "specs", "tracker", "workspace"], "filenames": ["advanced.rst", "app_best_practices.rst", "basics.rst", "cli.rst", "component_best_practices.rst", "components/distributed.rst", "components/interpret.rst", "components/metrics.rst", "components/overview.rst", "components/serve.rst", "components/train.rst", "components/utils.rst", "custom_components.md", "examples_apps/compute_world_size/main.rst", "examples_apps/datapreproc/datapreproc.rst", "examples_apps/index.rst", "examples_apps/lightning/data.rst", "examples_apps/lightning/interpret.rst", "examples_apps/lightning/model.rst", "examples_apps/lightning/profiler.rst", "examples_apps/lightning/train.rst", "examples_pipelines/index.rst", "examples_pipelines/kfp/advanced_pipeline.rst", "examples_pipelines/kfp/dist_pipeline.rst", "examples_pipelines/kfp/intro_pipeline.rst", "index.rst", "pipelines.rst", "pipelines/airflow.md", "pipelines/kfp.rst", "quickstart.md", "runner.rst", "runner.config.rst", "runtime/overview.rst", "runtime/tracking.rst", "schedulers.rst", "schedulers/aws_batch.rst", "schedulers/aws_sagemaker.rst", "schedulers/docker.rst", "schedulers/gcp_batch.rst", "schedulers/kubernetes.rst", "schedulers/kubernetes_mcad.rst", "schedulers/local.rst", "schedulers/lsf.rst", "schedulers/ray.rst", "schedulers/slurm.rst", "specs.rst", "tracker.rst", "workspace.rst"], "titles": ["Advanced Usage", "App Best Practices", "Basic Concepts", "CLI", "Component Best Practices", "Distributed", "Interpret", "Metrics", "Overview", "Serve", "Train", "Utils", "Custom Components", "Compute World Size Example", "Data Preprocessing App Example", "Application Examples", "Trainer Datasets Example", "Model Interpretability Example", "Tiny ImageNet Model", "Simple Logging Profiler", "Trainer Example", "Pipelines Examples", "Advanced KubeFlow Pipelines Example", "Distributed KubeFlow Pipelines Example", "Intro KubeFlow Pipelines Example", "TorchX", "torchx.pipelines", "Airflow", "Kubeflow Pipelines", "Quickstart", "torchx.runner", ".torchxconfig", "Overview", "Tracking", "torchx.schedulers", "AWS Batch", "AWS SageMaker", "Docker", "GCP Batch", "Kubernetes", "Kubernetes-MCAD", "Local", "IBM Spectrum LSF", "Ray", "Slurm", "torchx.specs", "torchx.tracker", "torchx.workspace"], "terms": {"torchx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 27, 29, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "defin": [0, 2, 3, 8, 16, 22, 23, 24, 27, 29, 31, 34, 36, 40, 45, 46, 47], "plugin": [0, 34, 39, 40], "point": [0, 1, 2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "you": [0, 1, 2, 3, 4, 5, 7, 8, 10, 12, 14, 15, 16, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 42, 43, 45, 47], "configur": [0, 4, 12, 27, 29, 30, 31, 34, 35, 36, 38, 40, 43, 44, 45, 47], "best": [0, 2, 3, 8, 10, 30], "support": [0, 1, 2, 4, 8, 11, 25, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "your": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 18, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 42, 45, 46, 47], "infrastructur": [0, 1, 2, 13, 15, 32], "setup": [0, 3, 4, 13, 15, 16, 17, 20, 22, 35, 45], "most": [0, 1, 2, 3, 22, 45], "done": [0, 12, 27], "through": [0, 2, 3, 8, 27, 28, 29, 31, 45, 47], "python": [0, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 30, 43, 45], "s": [0, 1, 3, 4, 5, 8, 11, 14, 15, 16, 17, 18, 19, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "entri": [0, 29, 30, 36], "requir": [0, 2, 4, 5, 8, 10, 11, 12, 14, 15, 17, 22, 29, 31, 32, 35, 36, 39, 40, 41, 43, 45, 46, 47], "packag": [0, 22, 27, 29, 30, 46], "contain": [0, 2, 3, 5, 8, 9, 11, 12, 15, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 47], "them": [0, 1, 2, 3, 4, 8, 12, 16, 17, 20, 22, 23, 26, 31, 34, 35, 44, 45], "instal": [0, 8, 12, 13, 15, 23, 25, 27, 28, 35, 37, 38, 39, 40, 42, 43, 44, 45], "If": [0, 1, 2, 3, 4, 8, 12, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "don": [0, 1, 2, 8, 10, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "t": [0, 1, 2, 4, 8, 10, 12, 18, 22, 27, 28, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "have": [0, 1, 2, 3, 4, 5, 8, 10, 12, 14, 15, 16, 18, 20, 22, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "we": [0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 16, 18, 22, 23, 24, 27, 29, 34, 39, 44], "recommend": [0, 1, 2, 4, 6, 7, 30, 40], "make": [0, 1, 2, 4, 9, 13, 20, 22, 24, 29, 30, 31, 32, 33, 41, 45, 47], "one": [0, 1, 2, 3, 4, 5, 8, 22, 24, 29, 30, 31, 33, 36, 37, 39, 41, 45], "so": [0, 1, 3, 4, 5, 8, 12, 15, 16, 22, 26, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "can": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "share": [0, 4, 8, 12, 15, 29, 35, 42], "definit": [0, 1, 2, 4, 8, 9, 12, 15, 21, 23, 24, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "across": [0, 1, 2, 15, 16, 33], "team": 0, "org": [0, 7, 9, 12, 28, 29, 30, 45], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 14, 15, 17, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "describ": [0, 1, 2, 24, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "below": [0, 2, 3, 8, 17, 30, 31, 45], "specifi": [0, 2, 3, 4, 5, 7, 8, 11, 14, 16, 17, 19, 20, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "project": [0, 13, 29, 31, 38, 40, 47], "py": [0, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 41, 42, 43, 45], "file": [0, 1, 2, 3, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 22, 23, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "from": [0, 1, 2, 3, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "setuptool": 0, "import": [0, 1, 2, 3, 4, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 31, 33, 39, 40, 45, 46], "foobar": [0, 31, 33, 41, 45], "entry_point": [0, 46], "my_schedul": 0, "my": 0, "create_schedul": [0, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "named_resourc": [0, 8, 45], "gpu_x2": 0, "my_modul": [0, 45, 46], "mai": [0, 1, 2, 3, 8, 11, 12, 20, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "implement": [0, 15, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "class": [0, 1, 4, 16, 18, 19, 28, 29, 31, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "interfac": [0, 2, 34, 35, 36, 37, 38, 39, 40, 43, 44, 46, 47], "function": [0, 2, 3, 4, 5, 8, 11, 12, 13, 18, 23, 24, 27, 28, 29, 36, 45], "should": [0, 2, 3, 4, 8, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "follow": [0, 2, 5, 8, 12, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "signatur": 0, "def": [0, 1, 2, 3, 4, 8, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 31, 43, 44, 45], "session_nam": [0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "str": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "kwarg": [0, 16, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "object": [0, 2, 3, 8, 9, 16, 22, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "return": [0, 2, 4, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 27, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "myschedul": 0, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "ad": [0, 1, 2, 8, 30, 31, 44, 46], "an": [0, 2, 3, 4, 6, 7, 8, 10, 12, 13, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "A": [0, 2, 5, 8, 31, 45, 47], "set": [0, 1, 3, 4, 5, 11, 12, 22, 23, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 44, 45, 47], "predefin": [0, 2], "spec": [0, 2, 3, 4, 5, 8, 9, 12, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 37, 39, 40, 41, 42], "ar": [0, 1, 2, 3, 4, 5, 7, 8, 11, 15, 17, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "given": [0, 2, 3, 8, 9, 16, 30, 31, 41, 43, 45], "string": [0, 3, 4, 8, 11, 29, 41, 45, 47], "particularli": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "when": [0, 1, 2, 3, 4, 5, 7, 8, 22, 25, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cluster": [0, 2, 12, 14, 21, 22, 23, 24, 27, 29, 39, 40, 42, 43, 44, 45, 47], "ha": [0, 2, 4, 5, 8, 10, 11, 13, 15, 17, 19, 20, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "fix": [0, 2], "instanc": [0, 2, 4, 5, 7, 8, 12, 22, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "type": [0, 1, 2, 3, 4, 5, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 42, 43, 44, 45], "For": [0, 1, 2, 4, 5, 7, 8, 10, 15, 16, 17, 20, 22, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "deep": 0, "learn": [0, 1, 2, 3, 10, 20, 27, 40], "train": [0, 4, 5, 7, 8, 13, 15, 16, 17, 18, 19, 20, 22, 25, 27, 29, 31, 33, 36, 40], "kubernet": [0, 2, 3, 5, 12, 14, 17, 22, 23, 25, 28, 29, 30, 31, 34, 35, 37], "aw": [0, 2, 25, 29, 34, 44], "compris": 0, "onli": [0, 2, 3, 5, 8, 20, 22, 23, 24, 28, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "p3": 0, "16xlarg": 0, "64": [0, 16, 17, 29, 45], "vcpu": 0, "8": [0, 5, 12, 29, 33, 41], "gpu": [0, 2, 3, 5, 11, 20, 27, 29, 39, 40, 41, 42, 45], "488gb": 0, "want": [0, 1, 2, 3, 4, 12, 22, 29, 31, 35], "enumer": [0, 17], "shirt": [0, 4], "size": [0, 4, 10, 16, 17, 20, 29, 33, 36, 42, 45], "gpu_x1": 0, "cpu": [0, 2, 3, 4, 5, 11, 20, 22, 29, 31, 39, 40, 45], "1": [0, 1, 2, 3, 4, 5, 8, 11, 12, 14, 16, 17, 18, 20, 22, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "memmb": [0, 2, 3, 5, 11, 22, 29, 39, 40, 45], "61_000": 0, "16": [0, 3, 16, 29], "2": [0, 2, 3, 5, 8, 11, 12, 17, 20, 27, 28, 29, 31, 33, 39, 40, 41, 42, 45], "122_000": 0, "gpu_x3": 0, "32": [0, 20], "4": [0, 2, 5, 8, 12, 29, 31, 33, 39, 40, 41, 45], "244_000": 0, "gpu_x4": 0, "488_000": 0, "To": [0, 1, 2, 3, 8, 12, 14, 15, 16, 20, 22, 23, 24, 27, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "avail": [0, 8, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "need": [0, 1, 2, 3, 4, 7, 8, 13, 16, 22, 23, 24, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "via": [0, 1, 4, 7, 8, 11, 12, 14, 15, 16, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onc": [0, 1, 5, 8, 12, 22, 23, 24, 27, 29, 30, 34, 41], "manner": 0, "get_named_resourc": [0, 4, 45], "122000": 0, "appdef": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "role": [0, 2, 3, 4, 5, 8, 12, 23, 24, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "test_app": 0, "imag": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 22, 23, 24, 28, 31, 35, 36, 37, 39, 40, 42, 45, 47], "author": [0, 2, 3, 4, 10, 32], "cli": [0, 2, 4, 12, 15, 22, 25, 29, 30, 34, 35, 44, 46], "builtin": [0, 1, 2, 4, 13, 22, 23, 24, 27, 29, 30, 31], "possibl": [0, 1, 2, 4, 24, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "relev": [0, 41], "organ": [0, 8], "wai": [0, 1, 2, 5, 8, 12, 29, 33, 39, 40, 41], "user": [0, 1, 2, 3, 4, 10, 12, 29, 30, 31, 34, 35, 36, 39, 40, 41, 44, 45, 47], "see": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 22, 23, 24, 27, 28, 29, 30, 31, 35, 37, 38, 39, 40, 42, 44, 45, 47], "thei": [0, 2, 4, 5, 8, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "run": [0, 1, 2, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "entrypoint": [0, 2, 3, 5, 8, 10, 11, 12, 23, 24, 30, 34, 41, 44, 45, 46], "my_project": 0, "bar": [0, 2, 3, 8, 11, 14, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "had": 0, "directori": [0, 4, 8, 11, 12, 13, 16, 27, 29, 31, 33, 35, 36, 37, 41, 42, 43, 44, 45, 46, 47], "structur": [0, 1, 4, 8, 34, 45], "project_root": 0, "baz": [0, 31], "And": [0, 2], "singl": [0, 2, 4, 5, 8, 13, 14, 15, 16, 17, 18, 24, 26, 28, 29, 31, 43, 44, 45], "call": [0, 2, 4, 5, 8, 11, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "trainer": [0, 1, 2, 3, 4, 5, 7, 8, 10, 17, 19, 22, 28, 30, 33, 40, 45], "were": [0, 2, 3, 12, 29, 30], "foo": [0, 2, 3, 11, 14, 28, 29, 30, 31, 33, 39, 40, 43, 45, 47], "search": [0, 20, 31], "modul": [0, 2, 3, 4, 5, 8, 11, 13, 15, 16, 17, 20, 26, 28, 29, 30, 32, 33, 45, 46, 47], "all": [0, 1, 3, 8, 15, 17, 20, 21, 22, 23, 24, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "group": [0, 5, 13, 15, 27, 29, 30, 36, 44, 46], "found": [0, 3, 8, 12, 31, 44], "under": [0, 2, 3, 5, 8, 12, 17, 22, 29, 33, 46, 47], "prefix": [0, 3, 8, 29, 31, 36], "In": [0, 2, 3, 8, 22, 31, 33, 40, 47], "case": [0, 1, 2, 3, 4, 8, 22, 31, 40, 47], "would": [0, 1, 2, 4, 8, 12, 22, 29, 30, 31, 34, 41], "those": [0, 2, 28, 29, 31, 34, 41, 47], "__init__": [0, 16, 18, 19], "attempt": [0, 3, 19, 27, 29, 33, 36], "recurs": [0, 11], "namespac": [0, 14, 17, 20, 22, 29, 31, 39, 40], "without": [0, 1, 3, 15, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "howev": [0, 1, 2, 4, 10, 34, 40, 41, 45], "top": [0, 2, 29, 31, 37, 47], "level": [0, 2, 29, 30, 31, 33, 36, 39, 40, 46], "displai": [0, 39], "test": [0, 11, 13, 16, 17, 23, 28, 34, 39, 41, 43], "app": [0, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "own": [0, 1, 2, 3, 8, 11, 22, 29, 33, 34, 45, 46], "includ": [0, 1, 2, 3, 8, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "its": [0, 2, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "add": [0, 12, 14, 24, 29, 31, 34, 39, 40, 45, 47], "must": [0, 5, 8, 14, 16, 17, 18, 28, 29, 31, 35, 36, 37, 39, 40, 41, 42, 44, 45, 46, 47], "anoth": [0, 22, 33], "back": [0, 12, 33, 35], "e": [0, 2, 5, 8, 11, 15, 17, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "g": [0, 2, 5, 8, 11, 15, 17, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dist": [0, 4, 5, 8, 10, 12, 13, 15, 20, 22, 23, 29, 31, 40, 42, 45], "ddp": [0, 2, 4, 8, 12, 13, 15, 20, 22, 29, 30, 31, 42, 45], "versu": 0, "default": [0, 3, 5, 8, 12, 13, 14, 20, 22, 23, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 45], "two": [0, 2, 5, 12, 15, 22, 29, 33, 36, 39], "registri": [0, 29, 35, 36], "same": [0, 3, 8, 18, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "There": [0, 1, 2, 3], "overlap": 0, "differ": [0, 1, 2, 3, 4, 5, 8, 12, 14, 16, 31, 33, 40, 41, 44], "alias": 0, "concret": 0, "omit": [0, 2, 3, 8, 31], "shorter": 0, "underscor": 0, "_": [0, 14, 16, 18, 29], "_0": 0, "_1": 0, "etc": [0, 3, 15, 29, 34, 42], "exampl": [0, 2, 3, 6, 7, 8, 10, 12, 18, 19, 27, 28, 29, 31, 33, 35, 36, 37, 39, 40, 41, 42, 45, 47], "effect": [0, 4, 5, 17, 20, 29, 30, 35], "expos": [0, 30, 37, 39, 40, 41, 45, 46], "oppos": 0, "vanilla": 0, "11": [0, 12, 29], "3": [0, 3, 5, 8, 12, 16, 17, 18, 20, 23, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "util": [0, 1, 2, 3, 8, 10, 12, 13, 14, 15, 16, 17, 22, 25, 27, 29, 32, 35, 36, 38, 39, 40, 42, 44], "more": [0, 1, 2, 4, 5, 8, 10, 11, 12, 13, 17, 20, 22, 23, 24, 26, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "written": [1, 2, 5, 31], "ani": [1, 4, 8, 11, 12, 15, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "languag": 1, "well": [1, 3, 4, 7, 8, 10, 11, 22, 26, 27, 28, 30, 44], "librari": [1, 2, 8, 11, 12, 16, 20, 29, 32, 36, 43], "allow": [1, 2, 3, 4, 5, 11, 12, 16, 22, 24, 26, 29, 31, 33, 34, 41, 44, 45, 46, 47], "maximum": [1, 7, 29, 40, 44], "flexibl": [1, 2, 4], "do": [1, 2, 3, 4, 8, 10, 13, 15, 18, 29, 30, 39, 41, 45], "standard": [1, 20, 22, 24, 29, 45, 47], "start": [1, 7, 8, 9, 12, 19, 25, 27, 29, 30, 31, 35, 36, 38, 41, 45], "provid": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 17, 20, 22, 26, 28, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47], "consist": [1, 7, 15, 30, 33, 45], "built": [1, 3, 6, 12, 15, 17, 22, 24, 29, 33, 36, 47], "compon": [1, 6, 7, 9, 10, 11, 13, 15, 16, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 38, 39, 40, 41, 42, 44], "applic": [1, 2, 3, 5, 6, 8, 9, 13, 20, 27, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "inform": [1, 2, 4, 8, 10, 29, 30, 34, 35, 36, 39, 45, 46], "how": [1, 2, 3, 4, 6, 7, 8, 10, 12, 13, 15, 22, 23, 24, 28, 29, 30, 31, 35, 36, 40, 45, 46], "handl": [1, 2, 3, 18, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "manag": [1, 9, 22, 29, 36, 41, 45], "fsspec": [1, 2, 7, 9, 11, 14, 15, 16, 17, 18, 20, 22, 29, 46, 47], "pluggabl": [1, 46], "filesystem": [1, 2, 12, 22, 29, 35, 39, 40, 47], "just": [1, 4, 8, 12, 18, 22, 24, 29, 30, 31], "chang": [1, 3, 12, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "input": [1, 2, 17, 29, 33, 36, 45, 46], "output": [1, 11, 14, 17, 19, 20, 22, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "path": [1, 2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 16, 17, 18, 20, 22, 29, 30, 31, 33, 35, 36, 37, 39, 40, 41, 43, 45, 47], "access": [1, 2, 3, 4, 11, 12, 22, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "new": [1, 2, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "environ": [1, 4, 5, 11, 12, 20, 29, 31, 35, 36, 37, 38, 41, 43, 45, 46], "backend": [1, 2, 3, 5, 29, 30, 34, 46], "pytorch": [1, 2, 5, 7, 9, 11, 12, 13, 15, 16, 20, 22, 25, 27, 29, 39, 45], "lightn": [1, 2, 7, 16, 17, 18, 19, 20, 22], "out": [1, 2, 3, 4, 6, 8, 10, 11, 14, 17, 18, 29, 30, 33, 34, 44, 45], "box": [1, 4, 6, 8, 10, 11, 29, 34], "elsewher": 1, "seamless": 1, "integr": [1, 17, 22, 27], "remot": [1, 2, 5, 14, 16, 17, 20, 22, 25, 27, 34, 35, 36, 37, 39, 41, 47], "also": [1, 2, 3, 12, 13, 18, 29, 31, 33, 35, 36, 45], "easier": [1, 4, 9, 32], "transit": [1, 30], "distribut": [1, 2, 8, 10, 13, 15, 20, 21, 24, 25, 27, 28, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "elast": [1, 4, 5, 13, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lot": 1, "depend": [1, 2, 3, 8, 13, 15, 20, 22, 27, 29, 30, 34, 36, 39, 40, 41, 43, 45, 46, 47], "architectur": [1, 20], "which": [1, 2, 3, 5, 7, 8, 13, 15, 16, 19, 20, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "why": [1, 18, 28], "some": [1, 2, 3, 14, 15, 18, 22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "common": [1, 2, 4, 5], "choic": [1, 8], "pure": [1, 2, 4, 8], "light": 1, "ignit": 1, "log": [1, 7, 12, 15, 18, 20, 22, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "monitor": [1, 7, 20, 29, 36], "job": [1, 2, 4, 5, 11, 12, 13, 15, 23, 25, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "standalon": [1, 2, 8, 12, 18, 29, 30], "tensorboard": [1, 3, 7, 12, 19, 20, 22, 45], "sinc": [1, 2, 3, 4, 5, 7, 8, 28, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "nativ": [1, 37], "like": [1, 2, 4, 12, 17, 22, 24, 27, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "s3": [1, 2, 9, 11, 22, 29, 33, 36, 45, 46], "gc": 1, "view": 1, "complex": [1, 4, 24, 26], "about": [1, 2, 5, 10, 27, 30, 33, 40], "while": [1, 2, 12, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "within": [1, 5, 8, 22, 27, 29, 30, 33, 34, 35, 36, 45, 46], "period": [1, 7], "recov": 1, "failur": [1, 45], "restart": [1, 37, 45], "paramet": [1, 2, 3, 4, 5, 7, 8, 9, 11, 18, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "lose": 1, "progress": [1, 7, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "thing": [1, 3, 4, 29, 33], "transfer": [1, 12], "resum": 1, "command": [1, 3, 8, 10, 11, 29, 31, 37, 45, 46], "line": [1, 3, 10, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "argument": [1, 2, 5, 8, 10, 11, 20, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "transient": 1, "error": [1, 3, 11, 12, 13, 29, 41, 45], "continu": [1, 14, 16, 17, 26], "later": [1, 16, 31], "adjust": [1, 45], "rate": [1, 20], "load": [1, 2, 12, 16, 17, 20, 22, 29, 31, 33, 35, 36, 38], "less": [1, 29, 41, 44], "code": [1, 2, 4, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 31, 33, 36, 42, 44, 45, 46, 47], "better": [1, 4], "maintain": [1, 2], "number": [1, 5, 6, 8, 11, 12, 14, 16, 20, 29, 35, 36, 40, 41, 45, 46], "similar": [1, 2, 4, 29, 41, 45], "task": [1, 27, 39, 42], "captum": [1, 6, 15, 17], "analys": 1, "result": [1, 3, 4, 11, 17, 22, 28, 29, 30, 33, 34, 36, 41, 45, 46], "interact": [1, 9, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "jupyt": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29], "notebook": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 47], "commun": [1, 42], "hasn": 1, "format": [1, 4, 5, 8, 19, 28, 29, 30, 31, 33, 34, 44, 45], "here": [1, 3, 5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "coupl": 1, "option": [1, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 18, 20, 22, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "might": [1, 4, 29, 36], "ll": [1, 2, 3, 15, 22, 29, 31, 35, 39, 42], "state": [1, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dict": [1, 4, 5, 8, 9, 11, 19, 22, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41, 44, 45, 47], "ckpt": [1, 17, 22], "pt": [1, 9, 18], "modelcheckpoint": [1, 20], "hook": [1, 4], "work": [1, 8, 9, 10, 12, 22, 23, 29, 31, 36, 39, 40, 41, 42, 43, 44, 47], "harder": 1, "reusabl": [1, 4], "creat": [1, 2, 3, 4, 6, 7, 11, 12, 17, 18, 27, 29, 31, 34, 35, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47], "serializ": 1, "optim": [1, 18, 19, 29, 33], "execut": [1, 2, 5, 11, 12, 15, 27, 29, 33, 36, 39, 40, 41, 47], "perform": [1, 5, 9, 13, 15, 29, 45], "reli": [1, 3, 4, 5], "gil": 1, "These": [1, 4, 5, 9, 11, 15, 22, 26, 29, 45, 47], "complet": [1, 2, 4, 27, 30, 35, 36, 37, 39, 43, 44, 45], "self": [1, 3, 16, 18, 19, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "automat": [1, 29, 31, 39, 40, 45, 47], "convert": [1, 2, 8, 16, 17, 23, 24, 28], "document": [1, 5, 11, 15, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "export": [1, 12, 18, 20, 22, 27, 29, 36], "quantiz": 1, "version": [1, 8, 12, 15, 18, 20, 29, 30, 39, 40, 46], "both": [1, 3, 4, 5, 8, 29, 30, 45], "full": [1, 3, 4, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 26, 29, 36, 47], "precis": 1, "consum": [1, 4, 12, 17, 22, 29, 30], "9": [1, 12, 27, 29, 30, 40], "0": [1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "separ": [1, 11, 13, 22, 24, 29, 37, 42], "It": [1, 2, 3, 4, 13, 15, 16, 17, 18, 20, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quit": [1, 7], "doesn": [1, 22, 28, 37, 39, 40, 41, 45], "widespread": 1, "adopt": 1, "upload": [1, 14, 18, 21, 22, 23, 24, 29, 36, 47], "api": [1, 2, 3, 4, 9, 13, 22, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "reason": [1, 30], "write": [1, 2, 3, 4, 8, 12, 16, 29, 30, 33, 36, 41, 45], "custom": [1, 3, 4, 6, 10, 22, 25, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "logic": [1, 2, 3, 13, 15, 31, 45], "deploi": [1, 9, 40], "build": [1, 2, 12, 29, 35, 36, 37, 39, 47], "server": [1, 3, 5, 7, 40, 45], "typic": [1, 2, 13, 22, 23, 24, 31, 33, 41, 45], "unit": 1, "other": [1, 2, 3, 4, 8, 12, 16, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "unittest": 1, "main": [1, 2, 4, 5, 11, 12, 13, 14, 15, 17, 20, 29, 33, 40, 41], "customapptest": 1, "testcas": 1, "test_main": 1, "none": [1, 3, 4, 5, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "src": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "dst": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "asserttru": 1, "high": [2, 30, 31], "behind": 2, "check": [2, 3, 12, 18, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quickstart": [2, 8, 12, 25, 39], "guid": [2, 8, 12, 25, 29, 40], "workspac": [2, 12, 13, 25, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "patch": [2, 13, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "tool": [2, 3, 9, 44, 45], "submit": [2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "stage": [2, 8, 11, 16, 19, 35, 38, 42], "ml": [2, 13, 25, 29, 36, 46], "platform": [2, 8, 24, 29], "abstract": [2, 22, 34, 41, 45, 46, 47], "uml": 2, "diagram": [2, 8, 30], "simpli": [2, 3, 8, 11, 14, 15, 31, 34, 41], "struct": 2, "actual": [2, 9, 13, 15, 22, 30, 31, 33, 34, 41, 45], "lingo": 2, "jobdefinit": 2, "yaml": [2, 21, 22, 23, 24, 28, 29, 39, 40], "disambigu": 2, "between": [2, 11, 16, 20, 29, 33, 35, 36, 45], "binari": [2, 3, 5, 11, 12, 29, 33, 41], "refer": [2, 3, 8, 15, 34, 43, 44, 45, 47], "understood": [2, 3], "simpl": [2, 3, 4, 8, 10, 12, 13, 14, 15, 17, 18, 29, 33, 34, 45], "echo": [2, 3, 8, 11, 12, 23, 24, 27, 35, 36, 38, 39, 40, 42, 44], "hello": [2, 3, 8, 10, 11, 13, 15, 23, 24, 25, 27, 31, 35, 36, 38, 39, 40, 44], "world": [2, 8, 11, 20, 31], "name": [2, 3, 5, 8, 9, 11, 12, 18, 20, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "bin": [2, 3, 23, 24], "tmp": [2, 3, 11, 12, 14, 17, 20, 22, 27, 28, 29, 33, 41], "arg": [2, 3, 4, 8, 11, 12, 14, 16, 17, 20, 22, 23, 24, 28, 29, 30, 33, 34, 36, 41, 44, 45, 47], "num_replica": [2, 3, 4, 8, 11, 23, 28, 29, 41, 42, 45], "As": [2, 7, 8, 10, 13, 24, 33], "dataclass": 2, "encod": [2, 33, 45], "pass": [2, 3, 4, 5, 11, 12, 16, 20, 28, 29, 30, 31, 33, 37, 39, 40, 41, 44, 45, 47], "few": [2, 3, 8, 29, 33, 34], "varieti": [2, 5], "topolog": [2, 5], "mean": [2, 3, 19, 29, 31, 33, 35, 40], "multipl": [2, 3, 4, 5, 8, 23, 24, 30, 31, 34, 40, 41, 45], "repres": [2, 8, 28, 31, 43, 45], "non": [2, 4, 27, 34, 39, 45], "homogen": [2, 5], "coordin": [2, 5, 29, 33, 45], "mani": [2, 10, 30, 34], "worker": [2, 5, 11, 20, 22, 24, 29, 33, 43, 45], "doc": [2, 4, 8, 12, 13, 23, 28, 29, 35, 38, 39, 40, 42, 45, 47], "what": [2, 8, 22, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "field": [2, 3, 4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "good": 2, "scratch": [2, 4], "rather": [2, 3, 8, 13, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "templet": [2, 5, 8], "think": [2, 8], "conveni": [2, 3, 30, 45], "factori": [2, 4, 8, 28, 34, 43, 45, 46], "method": [2, 4, 8, 17, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "unlik": [2, 31, 45], "map": [2, 5, 8, 28, 30, 31, 33, 45, 47], "granular": 2, "vari": [2, 22], "abov": [2, 3, 8, 14, 27, 45], "readi": [2, 11, 25], "hardcod": 2, "data": [2, 5, 16, 17, 20, 22, 29, 33, 34, 36, 42, 45], "parallel": [2, 5, 15, 22, 29, 35, 45], "style": [2, 4, 5, 13, 15, 29, 31, 45], "node": [2, 3, 4, 5, 8, 15, 20, 22, 24, 28, 29, 30, 35, 39, 40, 41, 45], "jobnam": 2, "nnode": [2, 5, 8, 29], "int": [2, 3, 4, 5, 7, 8, 11, 14, 16, 18, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "script_arg": [2, 5, 8], "single_gpu": 2, "resourc": [2, 3, 5, 11, 12, 22, 23, 28, 29, 30, 34, 35, 36, 39, 40, 41, 44], "1024": [2, 5, 11, 22, 29, 45], "parameter": 2, "up": [2, 4, 8, 22, 27, 28, 29, 30, 31, 33, 38, 40, 41, 45], "effort": [2, 33], "than": [2, 4, 5, 8, 13, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "try": [2, 16, 30, 39], "over": [2, 5, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "gener": [2, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 36, 39, 44, 47], "everyth": [2, 3], "easi": [2, 5, 16, 22, 33], "cheap": 2, "base": [2, 3, 4, 8, 11, 12, 19, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "repetit": 2, "protip": 2, "composit": 2, "achiev": 2, "purpos": [2, 8, 12, 17, 22, 29, 33, 41], "dsl": [2, 23, 24, 28], "section": [2, 8, 31, 45, 46], "understand": [2, 4, 6, 24, 26, 33], "context": [2, 8, 12, 29, 35, 47], "befor": [2, 3, 4, 7, 15, 22, 30, 45, 47], "brows": [2, 3, 8, 27, 29], "fit": [2, 3, 20, 29, 36], "doe": [2, 3, 8, 9, 11, 12, 13, 15, 16, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "exactli": 2, "expect": [2, 5, 34, 38, 40, 41, 44, 45, 47], "launch": [2, 3, 5, 7, 8, 12, 13, 14, 15, 17, 22, 23, 24, 26, 27, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onto": [2, 3, 14, 31, 34, 35], "app_spec": 2, "programmat": [2, 4, 12, 27, 29, 41, 43, 44, 47], "get_runn": [2, 8, 27, 30, 31], "appspec": [2, 35, 36, 37, 39, 43, 44], "list": [2, 4, 5, 8, 11, 14, 17, 18, 20, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "plug": 2, "workflow": [2, 3, 8, 11, 16, 27, 33], "specif": [2, 3, 5, 6, 24, 26, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "target": [2, 8, 17], "kubeflow": [2, 25, 26], "whatev": 2, "represent": 2, "kfp": [2, 21, 22, 23, 24], "containerop": [2, 24, 28], "accur": 2, "advanc": [2, 13, 21, 23, 24, 25, 29], "especi": [2, 4], "mini": 2, "control": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "flow": 2, "hpo": [2, 11, 19, 46], "sub": [2, 5, 29, 30, 32, 33], "inlin": [2, 24], "exact": [2, 3, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "semant": [2, 8, 30, 34, 47], "dynam": 2, "upstream": [2, 8], "take": [2, 3, 5, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "advantag": [2, 46], "featur": [2, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "tri": [2, 24], "canon": 2, "portabl": 2, "skip": [2, 17, 31, 47], "zero": [2, 17, 45], "echo_torchx": 2, "becaus": [2, 3, 13, 27, 31, 45], "essenti": [2, 3], "anywher": [2, 33], "agnost": [2, 20, 32], "fashion": [2, 22], "layer": [2, 12, 20, 29], "touch": [2, 11, 12], "infra": [2, 29, 36], "NOT": [2, 3, 16, 30, 31, 33, 41, 45], "boto3": [2, 35, 36], "input_path": [2, 14, 22], "session": [2, 30, 45, 46], "client": [2, 3, 12, 22, 23, 24, 34, 35, 36, 38, 39, 40, 41], "s3_input_path": 2, "split": [2, 13, 15], "bucket": [2, 9, 29, 33, 36], "kei": [2, 29, 31, 33, 36, 45, 46], "join": [2, 14, 16, 17, 18, 20, 22], "download_fil": 2, "torch": [2, 4, 5, 8, 13, 15, 16, 17, 18, 20, 29, 40, 45], "rest": 2, "breviti": [2, 3, 8, 31], "implicit": 2, "assumpt": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "One": [2, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "storag": [2, 3, 7, 16, 22, 29, 36, 39, 40, 46], "introduc": 2, "system": [2, 29, 35, 40], "framework": 2, "alreadi": [2, 3, 12, 19, 29, 30, 31], "io": [2, 5, 7, 9, 11, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "hood": [2, 5, 8, 33], "rewritten": 2, "pytorch_lightn": [2, 16, 18, 19, 20], "input_url": 2, "fs": [2, 14, 16, 18, 46, 47], "get_filesystem": 2, "open": [2, 14, 16, 17, 22, 23, 24, 33], "rb": [2, 14], "f": [2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "now": [2, 3, 12, 31], "compat": [2, 13, 15, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46], "store": [2, 5, 22, 29, 31, 33, 35, 36, 45], "variou": [2, 8, 15, 31, 46], "With": [2, 27, 40], "exist": [2, 4, 7, 12, 14, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "find": [2, 3, 31, 45], "pointer": 2, "ideal": 2, "time": [2, 3, 4, 7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "intend": [2, 24, 30, 33, 43, 45], "But": 2, "proper": 2, "perman": 2, "home": [2, 8, 12, 29, 30, 31], "even": [2, 3, 4, 41], "entir": [2, 45], "oss": [2, 18, 20], "until": [2, 4, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "happen": 2, "matur": 2, "commandlin": [3, 45, 47], "around": [3, 16, 22, 33], "runner": [3, 4, 8, 12, 25, 26, 27, 29, 31, 34, 35, 36, 43], "directli": [3, 4, 7, 8, 17, 22, 27, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "pipelin": [3, 7, 12, 15, 27, 29, 33, 45], "aka": [3, 30], "quickli": [3, 26], "iter": [3, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "incur": 3, "technic": 3, "cognit": 3, "overhead": 3, "deal": [3, 22, 33, 45], "doubt": 3, "help": [3, 4, 8, 12, 14, 15, 17, 20, 22, 29, 34, 38, 40, 45, 47], "consid": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "n": [3, 5, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "config": [3, 13, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "metric": [3, 4, 12, 19, 22, 25, 29, 30, 36, 46], "serv": [3, 12, 22, 25], "torchserv": [3, 9, 12, 18, 22], "get": [3, 8, 12, 16, 20, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "runopt": [3, 29, 30, 31, 34, 38, 40, 45, 47], "local_dock": [3, 12, 29, 31, 37, 45], "log_dir": [3, 12, 27, 29, 31, 41], "dir": [3, 7, 12, 13, 27, 29, 31, 41], "stdout": [3, 5, 11, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stderr": [3, 5, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica": [3, 5, 11, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "local_cwd": [3, 4, 5, 8, 12, 14, 17, 20, 25, 27, 29, 30, 31, 41, 44, 45], "slurm": [3, 4, 25, 34], "subcommand": [3, 8, 31, 46], "either": [3, 4, 8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "sched_nam": 3, "along": [3, 7, 8, 30], "cat": [3, 31], "my_trainer_spec": 3, "my_train": [3, 45], "detail": [3, 4, 20, 42], "chose": [3, 5, 29, 31, 34], "three": 3, "scheduler_arg": [3, 35, 39, 40], "known": [3, 11, 31, 34, 39], "run_opt": [3, 34, 38, 40], "run_config": 3, "each": [3, 4, 5, 11, 19, 20, 21, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "shown": [3, 31, 36], "comma": [3, 29, 31, 37, 45], "delimit": [3, 8, 20, 31, 45], "k": [3, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "v": [3, 15], "pair": [3, 31, 45], "seen": [3, 8], "usag": [3, 8, 12, 27, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "h": [3, 5, 8, 11, 29, 45], "msg": [3, 8, 11, 12, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45], "messag": [3, 8, 11, 27, 29, 34, 45], "show": [3, 8, 22, 27, 28, 29, 31], "exit": [3, 4, 7, 8, 12, 27, 29, 45], "put": [3, 14, 18, 27, 33], "togeth": [3, 23, 24, 44], "2022": 3, "06": 3, "15": [3, 12, 29], "08": 3, "57": [3, 29], "info": [3, 4, 5, 11, 12, 17, 22, 23, 24, 27, 28, 29, 30, 34, 35, 36, 37, 39, 40, 44], "locat": [3, 11, 29, 36, 38, 41, 43, 46], "crls3hcpwjmhc": 3, "By": [3, 41], "block": [3, 4, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "finish": [3, 12, 29, 36, 43], "instead": [3, 4, 5, 12, 27, 29, 30, 33, 34, 39, 43, 45, 47], "print": [3, 10, 12, 13, 14, 16, 17, 18, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "url": [3, 9, 11, 14, 33, 34, 45], "form": [3, 8, 44, 45], "scheduler_nam": [3, 31], "job_id": [3, 46], "keep": [3, 4, 31, 34], "note": [3, 4, 5, 8, 11, 12, 14, 15, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "identifi": [3, 18, 29, 34, 35, 45, 47], "regist": [3, 5, 9, 11, 29, 30, 31, 34, 45], "debug": [3, 5, 29, 36], "request": [3, 12, 29, 30, 34, 36, 39, 40, 41, 44, 45], "hello_world": [3, 12, 42, 45], "metadata": [3, 12, 22, 28, 29, 45, 46], "env": [3, 5, 11, 27, 29, 31, 37, 41, 44, 45], "max_retri": [3, 5, 11, 29, 39, 40, 44, 45], "port_map": [3, 28, 45], "capabl": [3, 5, 39, 40, 45, 47], "retry_polici": [3, 45], "retrypolici": [3, 45], "popenrequest": [3, 41], "app_id": [3, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "echo_c944ffb2": 3, "torchx_asmtmyqj": 3, "torchx_kiuk": 3, "role_param": [3, 41], "replicaparam": [3, 41], "torchelastic_error_fil": 3, "json": [3, 33, 34, 43, 45, 46], "role_log_dir": [3, 41], "look": [3, 29, 30, 31, 33, 45], "faux": 3, "local": [3, 5, 7, 12, 14, 16, 17, 20, 22, 25, 29, 30, 33, 34, 36, 37, 44, 47], "subprocess": [3, 18, 41], "popen": [3, 41], "simul": [3, 46], "posix": 3, "process": [3, 5, 13, 14, 15, 17, 22, 29, 34, 41], "nevertheless": 3, "valuabl": 3, "insight": 3, "translat": 3, "particular": [3, 4, 8, 31], "invers": 3, "That": [3, 45], "app_handl": [3, 30, 34, 45], "recreat": [3, 34, 39], "descript": [3, 8, 12, 14, 17, 20, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "alwai": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "100": [3, 22, 30, 41], "wa": [3, 19, 27, 30, 34, 44, 45], "extent": [3, 30], "numer": [3, 33], "factor": 3, "describe_job": 3, "whether": [3, 5, 8, 29, 35, 36, 37, 39, 43, 45], "ignor": [3, 5, 11, 17, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "never": [3, 16, 19], "spot": [3, 29, 36], "filter": [3, 30], "down": [3, 44], "larg": [3, 29, 33, 46], "long": [3, 30], "retain": [3, 29, 36], "archiv": [3, 9, 18], "behalf": [3, 41], "get_log": 3, "obtain": 3, "manual": [3, 4, 27, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "retent": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "properli": [3, 13], "wrapper": [3, 16], "let": [3, 8, 12, 14, 22, 29, 30], "pull": [3, 12, 41, 45], "place": [3, 4, 16, 17, 20, 22, 29, 40, 42, 44], "pattern": [3, 29, 30, 37, 45], "explanatori": 3, "id": [3, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "tail": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "still": [3, 8, 33, 46], "regex": [3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "except": [3, 16, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "role_nam": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica_id": [3, 39, 40, 44, 45], "rank": [3, 20, 30, 33], "side": [3, 7], "appli": [3, 28, 29, 31, 37, 39, 40, 45, 47], "veri": [3, 6, 18, 29], "tax": 3, "host": [3, 5, 8, 11, 29, 30, 35, 36, 37, 39, 40, 41, 42, 45], "pleas": [3, 27, 29, 34, 42, 45], "judgment": 3, "status": [3, 45], "further": [3, 29, 34], "a5qvfhe1hyq2w": 3, "succeed": [3, 12, 29, 45], "d796ei2tdtest": 3, "em0iao2m90000": 3, "fail": [3, 12, 30, 37, 39, 45], "ew33oxmdg0123": 3, "design": [4, 25, 26, 27, 45], "deviat": 4, "necessari": [4, 15, 30, 34, 41, 45], "m": [4, 5, 8, 11, 12, 18, 22, 29], "docker": [4, 5, 8, 12, 22, 25, 34, 35, 36, 45, 47], "resolut": [4, 30], "isn": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "folder": [4, 14, 16, 18, 47], "regardless": 4, "img_nam": 4, "img_vers": 4, "reus": [4, 12, 16], "hard": [4, 32], "sort": 4, "manipul": 4, "imposs": 4, "convent": [4, 33], "avoid": [4, 29, 30], "where": [4, 5, 8, 11, 22, 26, 29, 31, 33, 36, 43, 44, 45, 46], "feel": 4, "statement": 4, "prefer": [4, 34, 39, 40, 41, 45], "trainer_test": 4, "_trainer": 4, "trainer_prod": 4, "10": [4, 12, 20, 29, 30, 40, 45], "ref": 4, "overview": [4, 25], "memori": [4, 5, 11, 29, 39, 40, 42, 44, 47], "alloc": [4, 22, 30, 34, 39, 40, 41, 44, 45], "independ": [4, 40], "schedul": [4, 5, 8, 11, 12, 13, 14, 15, 17, 20, 22, 23, 26, 27, 28, 30, 31, 32, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "behavior": [4, 7, 26, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "altern": [4, 30, 46], "merg": 4, "could": [4, 45], "ui": [4, 22, 23, 24, 28, 45, 46], "sidecar": 4, "servic": [4, 7, 22, 29, 34, 39, 40, 46], "re": [4, 22, 25, 27, 34, 39, 40, 45], "comput": [4, 18, 20, 35], "extend": [4, 46], "dictionari": [4, 29, 34, 36], "figur": [4, 18], "static": [4, 22, 42, 45], "pyre": [4, 16, 17, 18], "mypi": 4, "normal": [4, 12, 14, 15, 16, 22, 27, 29], "valid": [4, 11, 13, 15, 22, 30, 33, 34, 41, 45], "componenttestcas": 4, "ensur": [4, 13, 17, 20, 34], "pars": [4, 30, 33, 45], "stricter": 4, "component_test_bas": 4, "methodnam": 4, "runtest": 4, "sourc": [4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "run_compon": [4, 27, 30], "callabl": [4, 16, 41, 45, 46], "scheduler_param": [4, 30], "interv": [4, 30], "float": [4, 7, 8, 11, 18, 19, 20, 30, 31, 33, 45, 47], "timeout": [4, 7, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "appstatu": [4, 30, 45], "helper": [4, 47], "hide": 4, "poll": [4, 7, 30], "reach": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "compplet": 4, "max": [4, 5, 45], "fixtur": 4, "exercis": 4, "teardown": [4, 16], "deconstruct": 4, "after": [4, 8, 17, 22, 29, 31, 36, 45], "function_nam": [4, 30], "fn": [4, 45], "bash": [4, 11, 44], "script": [4, 5, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 29, 42, 44], "core": [4, 24, 45], "gang": [5, 37, 39, 40], "copi": [5, 11, 12, 22, 29, 37, 43, 45], "leverag": [5, 22, 24, 29], "express": [5, 29, 36], "overal": 5, "wise": 5, "wherea": 5, "num": [5, 29, 45], "assum": [5, 8, 17, 22, 29, 33, 35, 39, 40, 41], "x": [5, 18, 29, 42], "j": [5, 15, 20, 22, 29, 31, 42], "1x4": 5, "total": [5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 30, 41], "2x4": 5, "rdzv_port": [5, 29], "master": [5, 22, 40], "port": [5, 7, 29, 45], "29500": [5, 29], "cfg": [5, 12, 13, 14, 20, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "queue": [5, 14, 23, 28, 29, 31, 35, 39, 42], "autosc": 5, "minimum": [5, 30, 39, 40, 45], "5": [5, 11, 12, 14, 16, 17, 29, 45], "5x8": 5, "compar": 5, "torchelast": [5, 29, 45], "read": [5, 16, 22, 23, 24, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "ghcr": [5, 7, 9, 11, 12, 29, 31], "7": [5, 7, 9, 11, 12, 29], "0dev0": [5, 7, 9, 11, 12, 29], "1x2": [5, 15, 20, 29, 31], "rdzv_backend": [5, 8, 29], "c10d": [5, 8, 29], "mount": [5, 11, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "bool": [5, 8, 9, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "fals": [5, 8, 9, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "tee": [5, 29], "multi": [5, 8, 24, 29, 35, 39, 40, 43], "rendezv": [5, 29], "rendezvous_endpoint": [5, 29], "rank_0_host": [5, 29], "instruct": [5, 12, 15, 17, 29, 39, 40], "free": [5, 29, 30, 34, 41, 45], "random": [5, 16, 17, 20, 29], "mutual": [5, 11, 29, 45], "exclus": [5, 11, 29, 45], "preced": [5, 11, 29, 31, 41], "overrid": [5, 29, 30, 31, 34, 41, 43], "experimentnam": [5, 29], "runnam": [5, 29], "per": [5, 8, 11, 20, 22, 29, 33, 39, 41, 43, 44], "mb": [5, 11, 29, 45], "min_nnod": [5, 29], "nproc_per_nod": [5, 8, 29], "exce": [5, 29], "varibl": [5, 11, 29], "env1": [5, 11, 29, 37], "v1": [5, 8, 11, 12, 29, 37, 39, 40, 45], "env2": [5, 11, 29, 37], "v2": [5, 8, 11, 29, 37, 45], "env3": [5, 11, 29, 37], "v3": [5, 8, 11, 29, 37, 45], "retri": [5, 11, 29, 39, 40, 41, 45], "rank0": [5, 29], "chosen": [5, 29], "ex": [5, 11, 29, 35, 36, 37, 39, 40, 45], "bind": [5, 11, 29, 35, 37, 39, 40, 42, 45], "volum": [5, 11, 29, 35, 36, 37, 39, 40, 45], "readonli": [5, 11, 29, 35, 37, 39, 40, 45], "preset": [5, 29], "flag": [5, 8, 29], "enabl": [5, 12, 29, 36, 38, 40, 44, 46], "std": [5, 29], "stream": [5, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "consol": [5, 29], "_torch_debug_flag": 5, "commonli": 5, "variabl": [5, 11, 29, 31, 34, 36, 37, 41, 43, 45], "cuda_launch_block": 5, "nccl_desync_debug": 5, "torch_distributed_debug": 5, "torch_show_cpp_stacktrac": 5, "model": [6, 7, 9, 10, 15, 16, 20, 22, 27, 28, 29, 33, 36, 41, 46], "often": [6, 10, 33, 46], "thu": [6, 39, 40, 45], "analyz": [6, 17], "render": [6, 7], "cloud": [7, 16, 22, 38, 39, 40, 42], "Or": [7, 31], "part": [7, 12, 15, 19, 21, 24, 26, 28, 30, 33, 45], "tensorboardlogg": [7, 20], "tutori": [7, 11, 17], "http": [7, 9, 12, 13, 14, 15, 17, 22, 23, 28, 29, 30, 35, 38, 39, 40, 42, 44, 45, 47], "intermedi": [7, 12, 29], "tensorboard_tutori": 7, "html": [7, 9, 29, 35, 44, 45], "logger": [7, 19, 20], "readthedoc": 7, "en": [7, 23, 28, 42], "stabl": [7, 29], "extens": 7, "logdir": 7, "3600": 7, "6006": 7, "start_on_fil": 7, "exit_on_fil": 7, "termin": [7, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "condit": 7, "caus": [7, 17], "trigger": 7, "correspond": [7, 28, 31, 45], "second": [7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 36, 44], "shutdown": 7, "illustr": 8, "Not": [8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "current": [8, 11, 12, 22, 28, 29, 31, 36, 39, 41, 43, 44, 45, 46, 47], "collect": [8, 13, 15, 27, 29], "categori": 8, "our": [8, 12, 16, 17, 20, 23, 24, 29], "page": 8, "ve": [8, 22], "being": [8, 29, 47], "downstream": [8, 30], "o": 8, "sure": [8, 22, 30, 31, 45], "rule": [8, 45, 47], "thumb": 8, "familiar": 8, "yourself": 8, "pep": 8, "484": 8, "annot": [8, 34, 45], "primit": [8, 45], "primitive_kei": 8, "primitive_valu": 8, "var_arg": 8, "docstr": [8, 45], "googl": [8, 12, 29, 38, 45], "function_with_pep484_type_annot": 8, "autogener": 8, "pick": [8, 31], "simplifi": 8, "os": [8, 14, 16, 17, 18, 20, 22, 41, 46], "aws_p3": [8, 45], "2xlarg": [8, 45], "basenam": [8, 14], "rdzv_endpoint": 8, "localhost": [8, 12, 15, 25, 41], "5900": 8, "nprocs_per_nod": 8, "save": [8, 14, 16, 17, 18, 20, 27, 29, 33, 36], "torchx_param": 8, "tip": [8, 31, 45], "improv": [8, 45], "posit": [8, 29], "dep": [8, 27], "machin": [8, 10, 39, 40, 45], "bodi": [8, 31], "Then": [8, 31], "reflect": [8, 47], "correctli": [8, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "easiest": 8, "dryrun": [8, 9, 30, 47], "linter": 8, "dist_test": 8, "ident": [8, 29, 41, 46], "fact": 8, "walk": [8, 14, 16, 47], "though": 8, "basic": [8, 12, 25, 34, 38, 40, 47], "invok": [8, 41, 45], "regular": [8, 14, 15, 29, 36], "component_modul": 8, "component_fn": 8, "rel": [8, 12, 13, 29, 30, 36, 41], "d": [8, 12, 29, 31], "drop": [8, 31], "slightli": [8, 29], "syntax": [8, 24], "component_path": [8, 30], "bob": [8, 31], "absolut": [8, 29, 30, 36, 41], "shell": [8, 44], "expans": 8, "cwd": [8, 29, 31, 41], "cd": [8, 15, 31], "know": [8, 22, 29, 45], "straight": 8, "forward": [8, 18], "program": [8, 11, 12, 14, 17, 29], "doubl": [8, 13], "dash": 8, "param_nam": 8, "param1": 8, "argpars": [8, 12, 14, 17, 20, 22], "parser": [8, 12, 14, 17, 20, 22], "summari": [8, 19], "imagin": 8, "comp": 8, "i": [8, 16, 17, 27, 29], "b": [8, 31], "l": 8, "vararg": [8, 31], "true": [8, 12, 14, 17, 18, 20, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "k1": 8, "k2": 8, "k3": 8, "c": [8, 10, 11, 29, 31, 43], "henc": [8, 14, 31, 33, 45, 46], "end": [8, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "arg1": 8, "arg2": 8, "arg3": 8, "adapt": [8, 22, 23, 24, 26, 28, 35, 45, 46], "orchestr": [8, 27], "expositori": [8, 13], "quick": [8, 16], "practic": [8, 10], "aim": 9, "infer": [9, 18, 20, 22, 29, 36], "model_path": [9, 22], "management_api": [9, 22], "param": [9, 22, 27, 45], "endpoint": [9, 22, 29, 36], "8081": [9, 22, 45], "root": [9, 14, 15, 16, 31, 41, 43, 45], "loop": 10, "construct": [10, 30, 45, 46], "emb": 10, "limit": [10, 11, 14, 16, 22, 29, 33, 41, 46], "smaller": 10, "sy": [10, 11, 12, 14, 17, 20, 22, 29], "argv": [10, 11, 12, 14, 17, 20, 22, 29], "cp": [11, 42], "meant": 11, "materi": [11, 44], "glue": 11, "oper": [11, 13, 15, 22, 23, 28, 29, 33, 45, 46], "meaning": 11, "sh": [11, 12, 23, 27, 28, 29, 39], "substitut": [11, 45], "destin": 11, "torchx_utils_python": [11, 29], "length": [11, 29], "booth": [11, 12], "x1": 11, "x2": 11, "trial_idx": 11, "tracker_bas": [11, 33], "evalu": [11, 29, 30, 36], "fsspecresulttrack": [11, 33], "outdir": 11, "uri": [11, 29, 33, 36], "tracker": [11, 12, 25, 27, 29, 33], "torchx_utils_binari": 11, "off": [12, 29], "anyth": [12, 29, 41], "writefil": [12, 29], "my_app": [12, 25, 29], "__name__": [12, 13, 14, 17, 20, 33], "__main__": [12, 13, 14, 17, 20, 33], "argumentpars": [12, 14, 17, 20, 22], "add_argu": [12, 14, 17, 20, 22], "person": [12, 31], "greet": 12, "parse_arg": [12, 14, 17, 20, 22], "friendli": 12, "my_compon": [12, 30, 31], "latest": [12, 28, 29, 35, 36, 39, 40, 41, 42, 45], "greeter": 12, "2024": [12, 27, 29], "04": [12, 27, 29], "12": [12, 29, 41], "18": [12, 29, 39, 40], "51": 12, "09": [12, 27], "temporari": [12, 27, 29], "delet": [12, 27, 29], "preserv": [12, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "torchx_3g6fjg0a": 12, "wait": [12, 27, 29, 30, 34, 36, 43], "wclp7r2vht0fcd": 12, "won": [12, 29, 44], "colab": [12, 29], "com": [12, 15, 22, 29, 35, 36, 37, 38, 39, 40, 42, 44, 45, 47], "dockerfil": [12, 29, 47], "0rc1": 12, "34": [12, 29, 40], "driver": [12, 45], "intern": 12, "dockerignor": [12, 47], "2b": 12, "0s": 12, "99b": 12, "3s": 12, "425b": 12, "sha256": [12, 29, 47], "a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3": 12, "resolv": [12, 29, 30, 41, 45], "6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203": 12, "0b": 12, "94mb": 12, "1s": 12, "4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca": 12, "05mb": 12, "26": [12, 29], "70mb": 12, "d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726": 12, "857b": 12, "25kb": 12, "3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c": 12, "21kb": 12, "889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f": 12, "189b": 12, "2s": 12, "20": [12, 29], "97mb": 12, "143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907": 12, "49mb": 12, "00gb": 12, "eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa": 12, "132b": 12, "d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71": 12, "21": [12, 29, 39], "46mb": 12, "4s": 12, "19mb": 12, "06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77d": 12, "257b": 12, "extract": [12, 16, 29, 36], "17": [12, 29], "85mb": 12, "5s": 12, "f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6": 12, "71gb": 12, "30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2": 12, "352b": 12, "6s": 12, "c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932": 12, "92b": 12, "909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233": 12, "341": 12, "29mb": 12, "7s": 12, "87mb": 12, "8s": 12, "36": 12, "9s": 12, "55": [12, 27, 29], "57mb": 12, "116": 12, "39mb": 12, "111": 12, "15mb": 12, "78": 12, "64mb": 12, "106": 12, "54mb": 12, "137": 12, "36mb": 12, "243": 12, "27mb": 12, "222": 12, "30mb": 12, "162": 12, "53mb": 12, "195": 12, "04mb": 12, "246": 12, "353": 12, "37mb": 12, "337": 12, "285": 12, "21mb": 12, "317": 12, "72mb": 12, "478": 12, "427": 12, "82mb": 12, "592": 12, "45mb": 12, "f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800": 12, "563": 12, "38kb": 12, "542": 12, "11mb": 12, "725": 12, "61mb": 12, "88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968": 12, "556": 12, "96kb": 12, "651": 12, "17mb": 12, "838": 12, "86mb": 12, "940": 12, "759": 12, "04gb": 12, "6": [12, 29, 39], "868": 12, "22mb": 12, "15gb": 12, "960": 12, "50mb": 12, "26gb": 12, "06gb": 12, "38gb": 12, "17gb": 12, "49gb": 12, "29gb": 12, "60gb": 12, "39gb": 12, "52gb": 12, "82gb": 12, "92gb": 12, "13": [12, 16, 27, 29, 40], "25": [12, 29], "30": [12, 29, 36, 43], "82": 12, "37c8ad67dc0eb8d93c8dbc9f62edf9534c8e0cda904ae2f8716f235bfb9b5b78": 12, "52": [12, 29], "40": [12, 29], "disabl": [12, 29, 31, 36, 44], "41": 12, "warn": [12, 27, 29, 34], "fall": 12, "404": 12, "43": [12, 29], "tag": [12, 27, 29, 35, 36, 47], "amp": 12, "fromimag": 12, "deni": 12, "repositori": [12, 29, 35, 36, 37, 39, 40, 47], "39": [12, 27, 29], "login": [12, 47], "step": [12, 22, 24, 25, 45, 46], "gt": [12, 27, 29], "37c8ad67dc0": 12, "44": 12, "97fb15451eb2": 12, "label": [12, 17, 29, 36], "76c2ffe7a3b9": 12, "47": [12, 29], "remov": [12, 27, 29, 30, 39], "e3aab59db1ca": 12, "successfulli": [12, 29, 30, 45], "e3aab59db1ca90e0225e17ef40be1c9ee397ae14b0b4d98e3f8d160553c8005d": 12, "origin": [12, 29, 35, 36, 37, 39, 43, 44, 45], "48": 12, "49": 12, "krph0z37pmr4": 12, "push": [12, 29, 35, 36, 37, 39, 40, 47], "premad": 12, "discov": 12, "spmd": 12, "b3be52588e18": [12, 29], "14": [12, 18], "70cc4f72a637": 12, "1e51f706f9a3": 12, "76e96c1c09fb": 12, "76e96c1c09fbb481889f1b8ca0665394f0100c90b1c6a87c32839fadaad77f3": 12, "19": [12, 29], "c75hw600v5g7bc": 12, "click": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "download": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 36, 41, 42], "minim": [13, 15], "initi": [13, 15, 16, 20, 29, 36, 43, 45], "all_reduc": [13, 15, 29], "enough": [13, 15], "compute_world_s": [13, 15], "submodul": 13, "e2": [13, 25], "diff": [13, 47], "hydra": 13, "stack": 13, "been": [13, 17, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "seriou": 13, "omegaconf": 13, "dictconfig": 13, "multiprocess": 13, "record": [13, 19, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "to_yaml": 13, "throw": [13, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "rais": [13, 19, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "runtimeerror": 13, "compos": [13, 14, 16], "ipython": 13, "pwd": 13, "ab": 13, "cc": 13, "jupyter_notebook": 13, "initialize_config_modul": 13, "config_modul": 13, "config_nam": 13, "minut": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 44], "000": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "ipynb": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "galleri": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "sphinx": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "torchvis": [14, 15, 16, 18, 29], "reupload": [14, 15], "datapreproc": [14, 17, 22], "cs231n": [14, 22], "stanford": [14, 22], "edu": [14, 22], "tini": [14, 15, 16, 22], "imagenet": [14, 15, 16, 22], "200": [14, 18, 22], "zip": [14, 15, 21, 22], "output_path": [14, 16, 17, 20, 22, 29, 36], "tarfil": [14, 16], "tempfil": [14, 17, 20], "zipfil": 14, "pil": [14, 16], "transform": [14, 16, 22, 26, 28], "dataset": [14, 15, 17, 18], "is_image_fil": [14, 16], "tqdm": [14, 16, 29], "tar": [14, 16, 45], "gz": [14, 16], "download_and_extract_zip_arch": 14, "r": [14, 15, 16, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "zip_ref": 14, "extractal": [14, 16], "temporarydirectori": [14, 17, 20], "tmpdir": [14, 16, 17, 18, 20], "img_root": [14, 16, 41, 45], "splitext": 14, "totensor": [14, 16], "topilimag": [14, 16], "image_fil": [14, 16], "fname": [14, 16], "append": [14, 16, 17, 20, 22, 45], "len": [14, 16, 18], "break": [14, 30], "minit": [14, 16], "2000": [14, 16], "tar_path": [14, 16], "pack": [14, 16], "mode": [14, 16, 29, 30, 36, 39, 40], "w": [14, 33], "arcnam": 14, "rpath": [14, 16, 18], "get_fs_token_path": [14, 16, 18], "assert": [14, 16, 17, 18, 27], "rm": 14, "global": [14, 17, 20, 22], "sphinx_gallery_thumbnail_path": [14, 16, 17, 18, 19, 20, 22, 23, 24], "_static": [14, 16, 17, 18, 19, 20, 22, 23, 24], "img": [14, 16, 17, 18, 19, 20, 22, 23, 24, 41, 47], "png": [14, 16, 17, 18, 19, 20, 22, 23, 24], "demonstr": [15, 33], "themselv": 15, "notic": [15, 35, 38, 42, 47], "pip": [15, 25, 29, 35, 38, 39], "git": [15, 29, 36], "clone": [15, 29, 36], "github": [15, 22, 39, 40, 44, 46], "torchx_vers": 15, "sed": 15, "checkout": [15, 27, 29, 47], "dev": [15, 25, 29, 35, 39, 40, 42, 45], "txt": [15, 29, 43, 45, 46], "repo": [15, 29, 36, 47], "interpret": [15, 18, 22, 25, 41, 45], "sever": [15, 45], "ism": 15, "respect": [15, 34, 47], "profil": [15, 20, 29, 36], "examples_apps_python": 15, "examples_apps_jupyt": 15, "numpi": [16, 17, 29], "pl": [16, 18, 20], "dataload": [16, 17], "imagefoldersamplesdataset": 16, "imagefold": 16, "sampl": [16, 46], "num_sampl": [16, 20], "super": [16, 18, 19], "__len__": 16, "fixm": [16, 17, 18, 31], "attribut": [16, 17, 43], "test_d": 16, "train_d": 16, "val_d": 16, "tinyimagenetdatamodul": [16, 17, 20], "lightningdatamodul": 16, "data_dir": [16, 17, 20], "batch_siz": [16, 17, 20], "loader": 16, "img_transform": 16, "val": [16, 18, 20], "train_dataload": 16, "val_dataload": 16, "test_dataload": [16, 17], "download_data": [16, 17, 20], "remote_path": [16, 18], "unextract": 16, "isdir": 16, "data_path": [16, 17, 20, 22], "create_random_data": [16, 17, 20], "num_imag": 16, "250": 16, "fill": [16, 31, 34], "randomli": 16, "64x64": 16, "preprocess": [16, 17, 22], "train_path": 16, "class1_train_path": 16, "class1": 16, "class2_train_path": 16, "class2": 16, "val_path": 16, "class1_val_path": 16, "class2_val_path": 16, "test_path": 16, "class1_test_path": 16, "class2_test_path": 16, "makedir": [16, 17, 20], "fileexistserror": 16, "rang": [16, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pixel": 16, "rand": 16, "255": 16, "im": 16, "fromarrai": 16, "astyp": 16, "uint8": 16, "rgb": 16, "rand_image_": 16, "jpeg": 16, "process_imag": 16, "lib": [16, 18, 19, 29, 45], "seri": [17, 29, 36], "gradient": [17, 22], "overlai": [17, 29, 47], "ai": 17, "cifar_torchvision_interpret": 17, "load_path": [17, 20, 22], "last": [17, 22, 45], "viewer": [17, 28], "visual": 17, "equal": [17, 45], "benefit": 17, "swap": 17, "itertool": 17, "tinyimagenetmodel": [17, 18, 20], "otherwis": [17, 20, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "crash": [17, 45], "np": 17, "attr": 17, "integratedgradi": 17, "viz": 17, "checkpoint": [17, 20, 22, 29, 36], "weight": [17, 33], "analysi": 17, "convert_to_rgb": 17, "arr": 17, "tensor": [17, 18, 29], "ndarrai": 17, "24": [17, 27, 29, 36, 40, 45], "arrai": 17, "squeez": 17, "swapax": 17, "shape": 17, "invalid": [17, 30, 45], "produc": [17, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "init": [17, 20], "load_from_checkpoint": [17, 20], "checkpoint_path": [17, 20], "els": [17, 20, 22], "ig": 17, "first": [17, 22, 23, 24, 29, 30, 34, 45, 46, 47], "islic": 17, "unsqueez": 17, "dim": 17, "zero_grad": 17, "attr_ig": 17, "delta": 17, "baselin": 17, "return_convergence_delta": 17, "count_nonzero": 17, "toi": [17, 18], "sometim": 17, "due": [17, 39, 41], "fig": 17, "axi": 17, "visualize_image_attr": 17, "blended_heat_map": 17, "sign": [17, 29, 37], "show_colorbar": 17, "titl": 17, "out_path": [17, 18], "ig_": 17, "heatmap": 17, "wb": 17, "savefig": 17, "regress": 18, "tupl": [18, 28, 35, 36, 39, 40, 44, 45, 47], "jit": 18, "nn": 18, "torchmetr": 18, "accuraci": [18, 30, 33], "resnet": [18, 29], "basicblock": [18, 29], "lightningmodul": 18, "linear": [18, 29], "net": 18, "layer_s": 18, "lr": [18, 20], "001": 18, "small": [18, 29, 39, 40], "tweak": 18, "match": [18, 31, 44], "tinyimagenet": 18, "avgpool": 18, "adaptiveavgpool2d": 18, "fc": [18, 29], "out_featur": [18, 29], "train_acc": [18, 20], "val_acc": [18, 20], "training_step": 18, "batch": [18, 20, 25, 34], "batch_idx": 18, "_step": 18, "validation_step": 18, "val_batch": 18, "step_nam": 18, "acc_metr": 18, "y": 18, "y_pred": 18, "loss": 18, "cross_entropi": 18, "_loss": 18, "_acc": 18, "todo": 18, "aivan": 18, "fb": 18, "cannot": [18, 27, 41, 42, 45], "configure_optim": 18, "adamw": 18, "export_inference_model": [18, 20], "torchscript": 18, "serial": [18, 33, 43], "dure": [18, 29, 36, 41, 45], "jite": 18, "jit_path": 18, "model_jit": 18, "model_nam": [18, 22], "tiny_image_net": [18, 22], "mar_path": 18, "mar": [18, 22], "handler": 18, "durat": [19, 29, 36], "ax": 19, "lightningloggerbas": 19, "baseprofil": 19, "simpleloggingprofil": [19, 20], "action": [19, 20, 30], "report": [19, 30], "duration_": 19, "event": [19, 29, 44], "current_act": 19, "action_nam": 19, "valueerror": [19, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "monoton": 19, "stop": [19, 30, 45], "end_tim": 19, "start_tim": 19, "pop": 19, "log_metr": 19, "runtim": [20, 28, 29, 31, 32, 33, 37, 39, 40, 41, 45, 47], "epoch": [20, 22], "log_path": [20, 22], "skip_export": 20, "1x1": [20, 22], "addit": [20, 29, 31, 36, 40, 41, 45], "callback": 20, "store_tru": 20, "narg": 20, "mlp": 20, "hidden": 20, "neural": 20, "get_model_checkpoint": 20, "behav": [20, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "deadlock": 20, "train_loss": 20, "dirpath": [20, 43], "save_last": 20, "checkpoint_callback": 20, "save_dir": 20, "lightning_log": [20, 22], "num_nod": 20, "group_world_s": 20, "acceler": 20, "cuda": 20, "is_avail": 20, "devic": [20, 29, 35, 37, 39, 40, 41, 45], "local_world_s": 20, "strategi": 20, "max_epoch": 20, "acc": 20, "intro": 21, "examples_pipelines_python": 21, "examples_pipelines_jupyt": 21, "someth": [22, 26], "dist_ddp": 22, "utils_copi": 22, "utils_python": 22, "container_from_app": [22, 24, 28], "modifi": [22, 45, 46], "rebuild": [22, 47], "awai": 22, "blob": [22, 33, 40], "readm": [22, 29], "md": [22, 40], "svc": 22, "somewher": 22, "copy_app": 22, "next": 22, "raw": [22, 30, 39, 45], "previou": [22, 27, 45, 46], "ahead": 22, "fulli": [22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "processed_data_path": 22, "datapreproc_app": 22, "fast": [22, 25], "autom": 22, "__file__": 22, "dirnam": 22, "logs_path": 22, "models_path": 22, "trainer_app": 22, "3000": 22, "ui_metadata": [22, 28], "serve_app": 22, "initial_work": 22, "interpret_path": 22, "interpret_app": 22, "track": [22, 25, 29, 39, 44, 46], "set_tti": 22, "respons": [22, 34, 45], "compil": [22, 23, 24, 28], "pipeline_func": [22, 23, 24, 28], "package_path": [22, 23, 24, 28], "rt": [22, 23, 24], "advanced_pipelin": 22, "resource_from_app": [23, 28], "volcano": [23, 28, 29, 31, 39], "echo_app": [23, 24], "alpin": [23, 24, 35, 36, 39, 40, 42], "instanti": [23, 24, 34, 41], "echo_contain": [23, 24], "baseop": 23, "sdk": [23, 24, 28, 29], "chain": [23, 24, 33], "dist_pipelin": 23, "introductori": 24, "cross": 24, "mechan": [24, 37, 45, 46], "wherev": 24, "component_from_app": [24, 28], "convers": 24, "intro_pipelin": 24, "univers": 25, "launcher": 25, "research": 25, "product": 25, "concept": [25, 29, 39, 40], "torchxconfig": [25, 46], "mcad": [25, 29, 34], "rai": [25, 29, 34], "sagemak": [25, 29, 34], "ibm": [25, 34], "spectrum": [25, 34], "lsf": [25, 29, 34], "gcp": [25, 29, 34], "airflow": [25, 26], "deploy": [26, 40, 41], "assembl": 26, "easili": 27, "No": 27, "special": 27, "datetim": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pendulum": 27, "dagrunst": 27, "taskinstancest": 27, "dagruntyp": 27, "dag": 27, "decor": 27, "data_interval_start": 27, "2021": [27, 29], "tz": 27, "utc": 27, "data_interval_end": 27, "timedelta": 27, "dai": [27, 29, 44], "virtualenv": [27, 44], "task_id": 27, "hello_torchx": 27, "run_torchx": 27, "statu": [27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "wait_interv": [27, 30], "raise_for_statu": [27, 45], "didn": 27, "succe": 27, "final": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "log_lin": [27, 30], "make_uniqu": 27, "dag_id": 27, "example_python_oper": 27, "schedule_interv": 27, "start_dat": 27, "catchup": 27, "run_job": 27, "dagrun": 27, "create_dagrun": 27, "execution_d": 27, "data_interv": 27, "run_typ": 27, "ti": 27, "get_task_inst": 27, "get_task": 27, "ignore_ti_st": 27, "success": 27, "ipykernel_4183": 27, "454499020": 27, "removedinairflow3warn": 27, "deprec": [27, 30, 45], "futur": [27, 30, 45, 46], "releas": [27, 39, 40, 46], "12t18": 27, "23": 27, "143": [27, 29], "0000": 27, "taskinst": 27, "2073": 27, "met": 27, "dep_context": 27, "requeueabl": 27, "lt": [27, 29], "gqq75v6pktlbtc": 27, "manual__2021": 27, "13t00": 27, "00": [27, 29], "148": 27, "149": 27, "2303": 27, "150": 27, "2385": 27, "queued_dur": 27, "160": 27, "2327": 27, "_pythondecoratedoper": 27, "420": 27, "2644": 27, "var": [27, 29, 34, 41], "airflow_ctx_dag_own": 27, "airflow_ctx_dag_id": 27, "airflow_ctx_task_id": 27, "airflow_ctx_execution_d": 27, "airflow_ctx_try_numb": 27, "airflow_ctx_dag_run_id": 27, "423": 27, "430": 27, "endgroup": 27, "072": 27, "72": 27, "075": 27, "local_schedul": [27, 34, 41], "771": 27, "076": 27, "777": 27, "torchx_l_vf3y29": 27, "185": 27, "237": 27, "valu": [27, 29, 31, 33, 36, 39, 40, 41, 45, 46, 47], "186": 27, "441": 27, "post": 27, "193": 27, "1205": 27, "mark": 27, "20210913t000000": 27, "20240412t185523": 27, "end_dat": 27, "20240412t185524": 27, "goe": 27, "unspecifi": 28, "app_def": 28, "service_account": [28, 29, 39, 40], "resourceop": 28, "containerfactori": 28, "equival": [28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "load_component_from_": 28, "www": [28, 42], "component_spec_from_app": 28, "notabl": 28, "protocol": 28, "log_level": 29, "cancel": [29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "job_nam": [29, 36], "overwrit": [29, 30, 41], "extra": [29, 30, 45, 47], "itself": 29, "28": 29, "torchx_9t2ssdcr": 29, "29": 29, "jwhx7f9l94xtz": 29, "35": 29, "6aaa794c60af": 29, "4e5d8a0f0434": 29, "ee2bea9b99ba": 29, "ee2bea9b99badfc6a0e142601f3b6dd422502658577dc700fe23f591753f6201": 29, "ll7d9chml5wpkc": 29, "relat": [29, 45], "interest": 29, "dist_app": [29, 42], "init_process_group": 29, "gloo": [29, 42], "am": 29, "get_rank": 29, "get_world_s": 29, "2x2": [29, 42], "ea48253970d2": 29, "82940abe873f": 29, "61c5f68d0cce": 29, "61c5f68d0cce00660ce133362dd947508fb13e2c6029e9b83b35d7bf3ba4a10": 29, "53": 29, "594": 29, "omp_num_thread": 29, "overload": 29, "tune": [29, 33], "596": 29, "56": 29, "v4b2prrc0wl4kd": 29, "aws_batch": [29, 35, 36], "basi": [29, 39], "daemon": [29, 47], "image_repo": [29, 35, 36, 37, 39, 40], "partit": [29, 44], "copy_env": [29, 37], "quiet": [29, 35, 36, 37, 39], "glob": [29, 37], "foo_": [29, 37], "eiher": [29, 37], "semicolon": [29, 37], "ones": [29, 31, 37, 45], "suppress": [29, 35, 36, 37, 39], "verbos": [29, 35, 36, 37, 39], "prepend_cwd": [29, 41], "auto_set_cuda_visible_devic": [29, 41], "prepend": [29, 41], "cuda_available_devic": [29, 41], "assign": [29, 41, 45], "noth": [29, 30, 31, 41], "count": [29, 39, 40, 41], "comment": [29, 44], "constraint": [29, 44], "mail": [29, 44], "job_dir": [29, 44, 47], "hour": [29, 44], "torchxslurmjobdir": [29, 44], "priority_class": [29, 39], "account": [29, 39, 40, 44], "pod": [29, 39, 40], "priorityclass": [29, 39, 40], "kubernetes_mcad": [29, 40], "prioriti": [29, 35, 40, 45], "priority_class_nam": [29, 40], "image_secret": [29, 40], "coscheduler_nam": [29, 40], "network": [29, 36, 40, 41, 42], "higher": [29, 33, 35, 40], "integ": [29, 40], "admin": [29, 40], "openshift": [29, 40], "secret": [29, 40], "privat": [29, 36, 40, 42], "co": [29, 40], "beyond": [29, 33, 40], "privileg": [29, 35, 39, 40], "share_id": [29, 35], "job_role_arn": [29, 35], "execution_role_arn": [29, 35], "usernam": [29, 35, 36], "getpass": [29, 35, 36], "getus": [29, 35, 36], "elev": [29, 35], "permiss": [29, 35, 37, 45], "polici": [29, 35, 39, 41, 45], "9999": [29, 35], "amazon": [29, 35, 36, 45], "arn": [29, 35, 36], "iam": [29, 35, 36], "ec": [29, 35], "agent": [29, 35], "xdg": 29, "aws_sagemak": [29, 36], "instance_typ": [29, 36], "instance_count": [29, 36], "keep_alive_period_in_second": [29, 36], "volume_s": [29, 36], "volume_kms_kei": [29, 36], "max_run": [29, 36], "input_mod": [29, 36], "output_kms_kei": [29, 36], "base_job_nam": [29, 36], "subnet": [29, 36], "security_group_id": [29, 36], "model_uri": [29, 36], "model_channel_nam": [29, 36], "metric_definit": [29, 36], "encrypt_inter_container_traff": [29, 36], "use_spot_inst": [29, 36], "max_wait": [29, 36], "checkpoint_s3_uri": [29, 36], "checkpoint_local_path": [29, 36], "debugger_hook_config": [29, 36], "enable_sagemaker_metr": [29, 36], "enable_network_isol": [29, 36], "disable_profil": [29, 36], "max_retry_attempt": [29, 36], "source_dir": [29, 36], "git_config": [29, 36], "hyperparamet": [29, 36], "container_log_level": [29, 36], "code_loc": [29, 36], "training_repository_access_mod": [29, 36], "training_repository_credentials_provider_arn": [29, 36], "disable_output_compress": [29, 36], "enable_infra_check": [29, 36], "artifact": [29, 36, 46, 47], "ec2": [29, 35, 36, 45], "c4": [29, 36], "xlarg": [29, 36], "instance_group": [29, 36], "warm": [29, 36], "pool": [29, 36], "subsequ": [29, 36], "gb": [29, 33, 36], "km": [29, 36], "encrypt": [29, 36], "eb": [29, 36], "attach": [29, 36, 40, 47], "60": [29, 36], "algorithm": [29, 36], "estim": [29, 36], "timestamp": [29, 36], "vpc": [29, 36], "secur": [29, 36], "pre": [29, 31, 36], "channel": [29, 36], "traffic": [29, 36], "persist": [29, 36, 39, 40, 45, 46], "emit": [29, 36], "debugg": [29, 36], "unless": [29, 36, 43], "region": [29, 36], "isol": [29, 36, 44], "move": [29, 36, 45], "asid": [29, 36], "branch": [29, 36, 40], "commit": [29, 36], "2fa_en": [29, 36], "password": [29, 36], "token": [29, 36], "lambda": [29, 36], "credenti": [29, 35, 36, 38], "authent": [29, 35, 36, 38, 47], "compress": [29, 36], "gcp_batch": [29, 38], "central1": [29, 38], "cluster_config_fil": [29, 43], "cluster_nam": [29, 43], "dashboard_address": [29, 43], "127": [29, 43], "8265": [29, 43], "dashboard": [29, 43], "address": [29, 43], "against": [29, 43, 45], "lsf_queue": [29, 42], "jobdir": [29, 42], "container_workdir": [29, 42], "host_network": [29, 42], "shm_size": [29, 42], "64m": [29, 42], "shm": [29, 42], "timm_app": 29, "timm": 29, "resnet18": 29, "cuda11": 29, "cudnn8": 29, "newli": [29, 47], "02": 29, "03": 29, "c3f17e5ac010": 29, "38bd441c7f93": 29, "py3": 29, "whl": 29, "satisfi": 29, "opt": [29, 45], "conda": [29, 44], "python3": 29, "site": 29, "pyyaml": 29, "safetensor": 29, "cp37": 29, "cp37m": 29, "manylinux_2_17_x86_64": 29, "manylinux2014_x86_64": 29, "huggingfac": 29, "hub": 29, "huggingface_hub": 29, "268": 29, "kb": 29, "typing_extens": 29, "2023": 29, "42": 29, "61": 29, "importlib": 29, "importlib_metadata": 29, "22": [29, 39, 40], "filelock": 29, "zipp": 29, "chardet": 29, "certifi": 29, "2017": 29, "urllib3": 29, "27": 29, "idna": 29, "pillow": 29, "2d09ae5bbdf9": 29, "589835cf9c31": 29, "330b754b3c31": 29, "90c92dd1378a": 29, "90c92dd1378a48242eb294a36046bd04d0eb95aad22ed727816b26b7be73de83": 29, "conv1": 29, "conv2d": 29, "kernel_s": 29, "stride": 29, "pad": 29, "bia": 29, "bn1": 29, "batchnorm2d": 29, "ep": 29, "1e": 29, "05": 29, "momentum": 29, "affin": 29, "track_running_stat": 29, "act1": 29, "relu": 29, "inplac": 29, "maxpool": 29, "maxpool2d": 29, "dilat": 29, "ceil_mod": 29, "layer1": 29, "sequenti": [29, 33], "drop_block": 29, "aa": 29, "conv2": 29, "bn2": 29, "act2": 29, "layer2": 29, "128": 29, "downsampl": 29, "layer3": 29, "256": 29, "layer4": 29, "512": 29, "global_pool": 29, "selectadaptivepool2d": 29, "pool_typ": 29, "avg": 29, "flatten": 29, "start_dim": 29, "end_dim": 29, "in_featur": 29, "1000": 29, "mj0xbd3r65t2rd": 29, "runcfg": [30, 31, 41], "component_default": 30, "close": [30, 34, 41], "human": 30, "readabl": 30, "constructor": [30, 34], "scheduler_factori": 30, "schedulerfactori": [30, 34], "individu": [30, 42], "act": 30, "upon": [30, 45], "cach": 30, "direct": 30, "soon": 30, "interrupt": 30, "clean": 30, "deem": [30, 34, 41], "associ": [30, 45], "undefin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "ok": 30, "reconstruct": 30, "much": 30, "anymor": 30, "union": [30, 31, 35, 42, 45, 47], "parent_run_id": 30, "appdryruninfo": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dry": [30, 34], "pretti": 30, "dryrun_info": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dryrun_compon": 30, "component_arg": 30, "Will": 30, "listapprespons": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prototyp": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "phase": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "subject": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "should_tail": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "honor": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "guarante": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "highli": 30, "log_it": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "discourag": 30, "partial": [30, 35, 36, 37, 39, 41, 43, 44], "purg": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "whitespac": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "charact": 30, "newlin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "configvalu": [30, 45, 46], "present": [30, 31, 43, 45, 47], "anti": 30, "experi": [30, 46], "matches_regex": 30, "model_accuraci": 30, "parse_accuraci": 30, "experiment_nam": 30, "th": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "fetch": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "left": 30, "empti": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cursor": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "begin": 30, "unknownappexcept": 30, "order": [30, 31, 40, 45], "low": [30, 31], "file_path": 30, "componentvalidationexcept": 30, "componentnotfoundexcept": 30, "sparingli": 30, "abus": 30, "lead": 30, "go": 30, "complianc": 30, "term": 30, "unblock": 30, "certain": [30, 31, 41, 47], "short": 30, "scheduler_backend": [30, 34], "scheduler_run_opt": 30, "local_runopt": 30, "past": 30, "replac": [30, 31, 45, 47], "indefinit": 30, "app_statu": 30, "is_termin": 30, "sleep": [30, 33], "beta": [31, 47], "ini": 31, "sensibl": 31, "placehold": 31, "happi": 31, "redundantli": 31, "decid": 31, "date": 31, "leav": 31, "stale": 31, "ls": 31, "enviorn": 31, "torchx_config": 31, "hierarchi": 31, "overlaid": [31, 47], "malform": 31, "unrecogn": 31, "2x8": 31, "overwritten": [31, 33], "cmd": [31, 42, 44, 45], "addition": [31, 46], "some_workspac": 31, "outmost": 31, "hold": [31, 41, 44, 45], "dir_1": 31, "dir_2": 31, "textio": 31, "configfil": 31, "dump": [31, 33, 43], "required_onli": 31, "templat": [31, 45], "find_config": 31, "filepath": 31, "element": [31, 45], "get_config": 31, "barr": 31, "bazz": 31, "fooo": 31, "load_sect": 31, "content": [31, 44, 47], "categor": 32, "topic": [32, 42], "experiment": [33, 46], "AT": [33, 46], "risk": [33, 46], "TO": [33, 46], "keyword": 33, "intention": 33, "constrain": [33, 39, 40], "hundr": 33, "nor": 33, "quantiti": [33, 45], "hyper": 33, "suppos": 33, "app1": 33, "app2": 33, "feed": 33, "seem": 33, "worri": 33, "pseudo": 33, "do_someth": 33, "s3client": 33, "utf": 33, "output_fil": 33, "input_fil": 33, "decod": 33, "do_something_els": 33, "app1_out": 33, "app1_accuraci": 33, "l2norm": 33, "liter": [33, 45], "1kb": 33, "slash": 33, "statist": 33, "sem": 33, "uniqu": [33, 34, 43, 44, 45], "scope": 33, "central": 33, "entiti": 33, "strong": 33, "made": [33, 45], "similarli": 33, "consecut": 33, "BE": 33, "min": 33, "strongli": 33, "advis": 33, "concaten": 33, "experiment_id": 33, "trial_numb": 33, "123": 33, "attempt_1": 33, "233": 33, "outsid": 33, "get_scheduler_factori": 34, "get_default_scheduler_nam": 34, "default_scheduler_nam": 34, "abc": 34, "abstractmethod": 34, "kill": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "idempot": 34, "thread": [34, 41, 45], "safe": 34, "underli": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "longer": [34, 41], "wrap": [34, 40, 41, 46], "describeapprespons": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "qualifi": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "constitut": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "caller": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prior": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "Is": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "twice": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lost": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "live": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "arbitrari": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "stopiter": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "exhaust": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stuck": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "eventu": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "__getitem__": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "50": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "seek": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "50th": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "carriag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "select": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "combin": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "notimplementederror": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "encourag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "trivial": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "submit_dryrun": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "mostli": 34, "regard": 34, "not_set": 34, "appstat": [34, 40, 45], "unsubmit": [34, 45], "num_restart": [34, 45], "structured_error_msg": [34, 45], "ui_url": [34, 45], "roles_status": 34, "rolestatu": [34, 45], "suffici": 34, "recret": 34, "member": 34, "accessor": [34, 45], "popul": [34, 45], "userguid": 35, "batch_getstart": 35, "ecr": 35, "amazonecr": 35, "aws_batch_schedul": 35, "awsbatchschedul": 35, "log_client": 35, "docker_cli": [35, 36, 39, 40, 47], "dockercli": [35, 36, 39, 40, 47], "dockerworkspacemixin": [35, 36, 37, 39, 40, 47], "awsbatchopt": 35, "torchx_us": [35, 36, 39, 44], "1234": [35, 36, 39, 44], "ef": 35, "infiniband": 35, "uverbs0": 35, "perm": [35, 39, 40, 45], "rwm": [35, 37, 39, 40, 45], "parse_mount": [35, 37, 39, 40, 45], "fsx": 35, "repost": 35, "knowledg": 35, "center": 35, "lustr": 35, "fabric": 35, "efa": 35, "batchjob": 35, "nonetyp": [35, 42], "job_def": [35, 36, 38], "images_to_push": [35, 36, 39, 40, 47], "aws_sagemaker_schedul": 36, "awssagemakerschedul": 36, "awssagemakeropt": 36, "sagemakerschedul": 36, "awssagemakerjob": 36, "requri": 36, "docker_schedul": 37, "dockerschedul": 37, "dockeropt": 37, "closest": 37, "dockerjob": 37, "dockercontain": 37, "has_dock": 37, "gcp_batch_schedul": 38, "gcpbatchschedul": 38, "gcpbatchopt": 38, "app_id1234": 38, "gcloud": 38, "gcpbatchjob": 38, "batch_v1": 38, "upgrad": 39, "kubectl": 39, "githubusercont": 39, "develop": 39, "kubernetes_schedul": 39, "kubernetesschedul": 39, "apicli": [39, 40], "kubernetesopt": 39, "confirm": [39, 40], "issu": [39, 40, 42, 44], "120": 39, "occur": [39, 45], "bug": 39, "1651": 39, "extern": [39, 40], "hostpath": [39, 40], "persistentvolumeclaim": [39, 40], "claim": [39, 40], "16000": [39, 40], "reserv": [39, 40], "whole": [39, 40], "reduc": [39, 40], "amount": [39, 40], "kubernetesjob": 39, "app_to_resourc": [39, 40], "macro": 39, "pod_label": [39, 40], "role_idx": [39, 40], "role_to_pod": [39, 40], "v1pod": [39, 40], "sanitize_for_seri": [39, 40], "obj": [39, 40, 45], "dispatch": 40, "appwrapp": 40, "codeflar": 40, "kubernetes_mcad_schedul": 40, "kubernetesmcadschedul": 40, "kubernetesmcadopt": 40, "among": 40, "e790d7f": 40, "your_image_repo": 40, "secondari": 40, "coschedul": 40, "podgroup": 40, "sig": 40, "tree": 40, "pkg": 40, "crd": 40, "k8": 40, "io_podgroup": 40, "At": 40, "guidanc": 40, "evict": [40, 45], "preemption": [40, 45], "multu": 40, "k8snetworkplumbingwg": 40, "cni": 40, "kubernetesmcadjob": 40, "mcad_svc": 40, "svc_name": 40, "service_port": 40, "v1servic": 40, "get_appwrapper_statu": 40, "get_port_for_servic": 40, "get_role_inform": 40, "generic_item": 40, "get_tasks_status_descript": 40, "unique_app_id": 40, "localschedul": 41, "image_provider_class": 41, "localopt": 41, "imageprovid": 41, "cache_s": 41, "extra_path": 41, "properti": [41, 45, 46], "enforc": 41, "orphan": 41, "cleanup": 41, "receiv": 41, "sigterm": 41, "sigint": 41, "spawn": 41, "faster": 41, "softwar": [41, 45], "cuda_visible_devic": 41, "accord": [41, 45], "replica_0": 41, "replica_1": 41, "role_0": 41, "role_1": 41, "replica_2": 41, "localhostschedul": 41, "real": 41, "op": 41, "fetch_rol": 41, "updat": [41, 47], "compli": [41, 45], "deleg": 41, "get_cwd": 41, "child": [41, 45], "get_entrypoint": 41, "get_replica_param": 41, "holder": 41, "cwdimageprovid": 41, "localdirectoryimageprovid": 41, "getcwd": 41, "conjunct": 41, "not_exist": 41, "image_typ": 41, "childprocess": 41, "logiter": 41, "log_fil": 41, "_popen": 41, "signalexcept": 41, "sigval": 41, "signal": 41, "got": 41, "feedback": 42, "edit": 42, "pak": 42, "lsf_schedul": 42, "lsfschedul": 42, "lsfopt": 42, "mnt": 42, "tofix": 42, "On": 42, "reoslv": 42, "lsfbsub": 42, "ray_schedul": 43, "rayschedul": 43, "ray_client": 43, "jobsubmissioncli": 43, "tmpdirworkspacemixin": 43, "rayopt": 43, "actor": 43, "torchxignor": [43, 47], "overridden": 43, "dummi": 43, "rayjob": 43, "wait_until_finish": 43, "has_rai": 43, "indic": 43, "rayactor": 43, "output_filenam": 43, "working_dir": 43, "ray_common": 43, "ip": 43, "connect": 43, "ray_main": 43, "slurm_schedul": 44, "slurmschedul": 44, "dirworkspacemixin": [44, 47], "slurmopt": 44, "heterogen": 44, "sbatch": 44, "jobid": 44, "abl": 44, "schedmd": 44, "section_opt": 44, "inherit": 44, "activ": 44, "heterogeneous_job": 44, "snapshot": 44, "1gb": 44, "realmemori": 44, "workaround": 44, "parallelclust": 44, "2198": 44, "slurmbatchrequest": 44, "slurmreplicarequest": 44, "srun_opt": 44, "sbatch_opt": 44, "classmethod": 44, "from_rol": 44, "nomem": 44, "srun": 44, "treatment": 45, "min_replica": 45, "base_imag": 45, "miss": 45, "bindmount": 45, "volumemount": 45, "devicemount": 45, "duti": 45, "ps": 45, "bundl": 45, "dictat": 45, "ball": 45, "my_imag": 45, "env_var": 45, "500": 45, "tcp_store": 45, "8080": 45, "auto": 45, "scale": 45, "give": 45, "least": 45, "9090": 45, "pre_proc": 45, "encount": 45, "unsuccess": 45, "hardwar": 45, "caveat": 45, "surviv": 45, "untouch": 45, "membership": 45, "departur": 45, "admitt": 45, "physic": 45, "ram": 45, "predec": 45, "registr": 45, "retriev": 45, "gpu_x_1": 45, "named_resources_aw": 45, "taken": 45, "mere": 45, "equval": 45, "mem": 45, "aws_t3": 45, "medium": 45, "aws_m5": 45, "8xlarg": 45, "aws_m5_2xlarg": 45, "aws_p3_2xlarg": 45, "aws_p3_8xlarg": 45, "aws_t3_medium": 45, "mention": 45, "image_root_dir": 45, "train_app": 45, "rank0_env": 45, "base_img_root": 45, "accept": 45, "run_config_opt": 45, "run_as_us": 45, "type_": 45, "cluster_id": 45, "preemptibl": 45, "illeg": 45, "bad_typ": 45, "cfg_kei": 45, "cfg_from_str": 45, "cfg_str": 45, "cast": 45, "appropri": 45, "unknown": 45, "cfg_liter": 45, "kv": 45, "semi": 45, "colon": 45, "cfgval": 45, "trail": 45, "strictli": 45, "correct": 45, "is_typ": 45, "tp": 45, "isinst": 45, "text": 45, "recent": 45, "filter_rol": 45, "appstatuserror": 45, "pend": 45, "yet": [45, 46], "unsuccessfulli": 45, "replicast": 45, "alia": 45, "src_path": 45, "dst_path": 45, "read_onli": 45, "mknode": 45, "file_lint": 45, "component_funct": 45, "lintermessag": 45, "vaidat": 45, "stypl": 45, "get_fn_docstr": 45, "char": 45, "torchfunctionvisitor": 45, "component_function_nam": 45, "visitor": 45, "torchxfunctionargsvalid": 45, "criteria": 45, "primitive_typ": 45, "visit_functiondef": 45, "functiondef": 45, "torchxargumenthelpformatt": 45, "prog": 45, "indent_incr": 45, "max_help_posit": 45, "width": 45, "formatt": 45, "app_specs_func_def": 45, "torchxfunctionvalid": 45, "torchxreturnvalid": 45, "practition": 46, "conceptu": 46, "uniform": 46, "solut": 46, "tracker_nam": 46, "inject": 46, "entry_point_or_module_factory_method": 46, "tracker1": 46, "tracker2": 46, "backend_2_entry_point": 46, "tracker3": 46, "mlflow": 46, "create_track": 46, "my_bucket": 46, "my_config": 46, "discover": 46, "accomplish": 46, "entry_point_nam": 46, "create_tracker_fn": 46, "app_run_from_env": 46, "torchx_job_id": 46, "app_run": 46, "fsspectrack": 46, "cmdtracker": 46, "parent": 46, "run_id": 46, "artifact_nam": 46, "consumpt": 46, "encapsul": 46, "stil": 46, "abstractfilesystem": [46, 47], "root_dir": 46, "backward": 46, "gurante": 46, "subdir": 46, "descend": 46, "cmd_tracker": 46, "workspacemixin": 47, "mix": 47, "abil": 47, "codebas": 47, "build_workspace_and_update_rol": 47, "simplest": 47, "effici": 47, "increment": 47, "mutat": 47, "dryrun_push_imag": 47, "dryrun_push": 47, "push_imag": 47, "workspace_opt": 47, "walk_workspac": 47, "ignore_nam": 47, "engin": 47, "builder": 47, "exclud": 47, "whose": 47, "_update_app_imag": 47, "_push_imag": 47}, "objects": {"torchx": [[3, 0, 0, "-", "cli"], [8, 0, 0, "-", "components"], [26, 0, 0, "-", "pipelines"], [30, 0, 0, "-", "runner"], [32, 0, 0, "-", "runtime"], [34, 0, 0, "-", "schedulers"], [45, 0, 0, "-", "specs"], [46, 0, 0, "-", "tracker"], [47, 0, 0, "-", "workspace"]], "torchx.cli.cmd_tracker": [[46, 1, 1, "", "CmdTracker"]], "torchx.components": [[4, 0, 0, "-", "component_test_base"], [5, 0, 0, "-", "dist"], [6, 0, 0, "-", "interpret"], [7, 0, 0, "-", "metrics"], [9, 0, 0, "-", "serve"], [10, 0, 0, "-", "train"], [11, 0, 0, "-", "utils"]], "torchx.components.component_test_base": [[4, 1, 1, "", "ComponentTestCase"]], "torchx.components.component_test_base.ComponentTestCase": [[4, 2, 1, "", "run_component"], [4, 2, 1, "", "setUp"], [4, 2, 1, "", "tearDown"], [4, 2, 1, "", "validate"]], "torchx.components.dist": [[5, 3, 1, "", "_TORCH_DEBUG_FLAGS"], [5, 4, 1, "", "ddp"]], "torchx.components.metrics": [[7, 4, 1, "", "tensorboard"]], "torchx.components.serve": [[9, 4, 1, "", "torchserve"]], "torchx.components.utils": [[11, 4, 1, "", "binary"], [11, 4, 1, "", "booth"], [11, 4, 1, "", "copy"], [11, 4, 1, "", "echo"], [11, 4, 1, "", "python"], [11, 4, 1, "", "sh"], [11, 4, 1, "", "touch"]], "torchx.pipelines": [[28, 0, 0, "-", "kfp"]], "torchx.pipelines.kfp.adapter": [[28, 1, 1, "", "ContainerFactory"], [28, 4, 1, "", "component_from_app"], [28, 4, 1, "", "component_spec_from_app"], [28, 4, 1, "", "container_from_app"], [28, 4, 1, "", "resource_from_app"]], "torchx.runner": [[30, 1, 1, "", "Runner"], [31, 0, 0, "-", "config"], [30, 4, 1, "", "get_runner"]], "torchx.runner.Runner": [[30, 2, 1, "", "cancel"], [30, 2, 1, "", "close"], [30, 2, 1, "", "describe"], [30, 2, 1, "", "dryrun"], [30, 2, 1, "", "dryrun_component"], [30, 2, 1, "", "list"], [30, 2, 1, "", "log_lines"], [30, 2, 1, "", "run"], [30, 2, 1, "", "run_component"], [30, 2, 1, "", "schedule"], [30, 2, 1, "", "scheduler_backends"], [30, 2, 1, "", "scheduler_run_opts"], [30, 2, 1, "", "status"], [30, 2, 1, "", "stop"], [30, 2, 1, "", "wait"]], "torchx.runner.config": [[31, 4, 1, "", "apply"], [31, 4, 1, "", "dump"], [31, 4, 1, "", "find_configs"], [31, 4, 1, "", "get_config"], [31, 4, 1, "", "get_configs"], [31, 4, 1, "", "load"], [31, 4, 1, "", "load_sections"]], "torchx.runtime": [[33, 0, 0, "-", "tracking"]], "torchx.runtime.tracking": [[33, 1, 1, "", "FsspecResultTracker"], [33, 1, 1, "", "ResultTracker"]], "torchx.schedulers": [[34, 1, 1, "", "Scheduler"], [34, 1, 1, "", "SchedulerFactory"], [35, 0, 0, "-", "aws_batch_scheduler"], [36, 0, 0, "-", "aws_sagemaker_scheduler"], [37, 0, 0, "-", "docker_scheduler"], [38, 0, 0, "-", "gcp_batch_scheduler"], [34, 4, 1, "", "get_default_scheduler_name"], [34, 4, 1, "", "get_scheduler_factories"], [40, 0, 0, "-", "kubernetes_mcad_scheduler"], [39, 0, 0, "-", "kubernetes_scheduler"], [41, 0, 0, "-", "local_scheduler"], [42, 0, 0, "-", "lsf_scheduler"], [43, 0, 0, "-", "ray_scheduler"], [44, 0, 0, "-", "slurm_scheduler"]], "torchx.schedulers.Scheduler": [[34, 2, 1, "", "cancel"], [34, 2, 1, "", "close"], [34, 2, 1, "", "describe"], [34, 2, 1, "", "exists"], [34, 2, 1, "", "list"], [34, 2, 1, "", "log_iter"], [34, 2, 1, "", "run_opts"], [34, 2, 1, "", "schedule"], [34, 2, 1, "", "submit"], [34, 2, 1, "", "submit_dryrun"]], "torchx.schedulers.api": [[34, 1, 1, "", "DescribeAppResponse"], [34, 1, 1, "", "ListAppResponse"]], "torchx.schedulers.aws_batch_scheduler": [[35, 1, 1, "", "AWSBatchScheduler"], [35, 1, 1, "", "BatchJob"], [35, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_batch_scheduler.AWSBatchScheduler": [[35, 2, 1, "", "describe"], [35, 2, 1, "", "list"], [35, 2, 1, "", "log_iter"], [35, 2, 1, "", "schedule"]], "torchx.schedulers.aws_sagemaker_scheduler": [[36, 1, 1, "", "AWSSageMakerJob"], [36, 1, 1, "", "AWSSageMakerScheduler"], [36, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_sagemaker_scheduler.AWSSageMakerScheduler": [[36, 2, 1, "", "describe"], [36, 2, 1, "", "list"], [36, 2, 1, "", "log_iter"], [36, 2, 1, "", "schedule"]], "torchx.schedulers.docker_scheduler": [[37, 1, 1, "", "DockerContainer"], [37, 1, 1, "", "DockerJob"], [37, 1, 1, "", "DockerScheduler"], [37, 4, 1, "", "create_scheduler"], [37, 4, 1, "", "has_docker"]], "torchx.schedulers.docker_scheduler.DockerScheduler": [[37, 2, 1, "", "describe"], [37, 2, 1, "", "list"], [37, 2, 1, "", "log_iter"], [37, 2, 1, "", "schedule"]], "torchx.schedulers.gcp_batch_scheduler": [[38, 1, 1, "", "GCPBatchJob"], [38, 1, 1, "", "GCPBatchScheduler"], [38, 4, 1, "", "create_scheduler"]], "torchx.schedulers.gcp_batch_scheduler.GCPBatchScheduler": [[38, 2, 1, "", "describe"], [38, 2, 1, "", "list"], [38, 2, 1, "", "log_iter"], [38, 2, 1, "", "run_opts"], [38, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_mcad_scheduler": [[40, 1, 1, "", "KubernetesMCADJob"], [40, 1, 1, "", "KubernetesMCADScheduler"], [40, 4, 1, "", "app_to_resource"], [40, 4, 1, "", "create_scheduler"], [40, 4, 1, "", "get_appwrapper_status"], [40, 4, 1, "", "get_port_for_service"], [40, 4, 1, "", "get_role_information"], [40, 4, 1, "", "get_tasks_status_description"], [40, 4, 1, "", "mcad_svc"], [40, 4, 1, "", "pod_labels"], [40, 4, 1, "", "role_to_pod"], [40, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_mcad_scheduler.KubernetesMCADScheduler": [[40, 2, 1, "", "describe"], [40, 2, 1, "", "list"], [40, 2, 1, "", "log_iter"], [40, 2, 1, "", "run_opts"], [40, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_scheduler": [[39, 1, 1, "", "KubernetesJob"], [39, 1, 1, "", "KubernetesScheduler"], [39, 4, 1, "", "app_to_resource"], [39, 4, 1, "", "create_scheduler"], [39, 4, 1, "", "pod_labels"], [39, 4, 1, "", "role_to_pod"], [39, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_scheduler.KubernetesScheduler": [[39, 2, 1, "", "describe"], [39, 2, 1, "", "list"], [39, 2, 1, "", "log_iter"], [39, 2, 1, "", "schedule"]], "torchx.schedulers.local_scheduler": [[41, 1, 1, "", "CWDImageProvider"], [41, 1, 1, "", "ImageProvider"], [41, 1, 1, "", "LocalDirectoryImageProvider"], [41, 1, 1, "", "LocalScheduler"], [41, 1, 1, "", "LogIterator"], [41, 1, 1, "", "PopenRequest"], [41, 1, 1, "", "ReplicaParam"], [41, 1, 1, "", "SignalException"], [41, 4, 1, "", "create_scheduler"]], "torchx.schedulers.local_scheduler.CWDImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.ImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "fetch_role"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"], [41, 2, 1, "", "get_replica_param"]], "torchx.schedulers.local_scheduler.LocalDirectoryImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.LocalScheduler": [[41, 2, 1, "", "auto_set_CUDA_VISIBLE_DEVICES"], [41, 2, 1, "", "close"], [41, 2, 1, "", "describe"], [41, 2, 1, "", "list"], [41, 2, 1, "", "log_iter"], [41, 2, 1, "", "schedule"]], "torchx.schedulers.lsf_scheduler": [[42, 1, 1, "", "LsfBsub"], [42, 1, 1, "", "LsfScheduler"], [42, 4, 1, "", "create_scheduler"]], "torchx.schedulers.lsf_scheduler.LsfScheduler": [[42, 2, 1, "", "describe"], [42, 2, 1, "", "list"], [42, 2, 1, "", "log_iter"], [42, 2, 1, "", "schedule"]], "torchx.schedulers.ray_scheduler": [[43, 1, 1, "", "RayJob"], [43, 1, 1, "", "RayScheduler"], [43, 4, 1, "", "create_scheduler"], [43, 4, 1, "", "has_ray"], [43, 4, 1, "", "serialize"]], "torchx.schedulers.ray_scheduler.RayScheduler": [[43, 2, 1, "", "describe"], [43, 2, 1, "", "list"], [43, 2, 1, "", "log_iter"], [43, 2, 1, "", "schedule"], [43, 2, 1, "", "wait_until_finish"]], "torchx.schedulers.slurm_scheduler": [[44, 1, 1, "", "SlurmBatchRequest"], [44, 1, 1, "", "SlurmReplicaRequest"], [44, 1, 1, "", "SlurmScheduler"], [44, 4, 1, "", "create_scheduler"]], "torchx.schedulers.slurm_scheduler.SlurmBatchRequest": [[44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmReplicaRequest": [[44, 2, 1, "", "from_role"], [44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmScheduler": [[44, 2, 1, "", "describe"], [44, 2, 1, "", "list"], [44, 2, 1, "", "log_iter"], [44, 2, 1, "", "schedule"]], "torchx.specs": [[45, 1, 1, "", "AppDef"], [45, 1, 1, "", "AppState"], [45, 1, 1, "", "AppStatus"], [45, 1, 1, "", "BindMount"], [45, 1, 1, "", "DeviceMount"], [45, 5, 1, "", "ReplicaState"], [45, 1, 1, "", "Resource"], [45, 1, 1, "", "RetryPolicy"], [45, 1, 1, "", "Role"], [45, 1, 1, "", "VolumeMount"], [45, 0, 0, "-", "file_linter"], [45, 4, 1, "", "get_named_resources"], [45, 1, 1, "", "macros"], [45, 0, 0, "-", "named_resources_aws"], [45, 4, 1, "", "parse_mounts"], [45, 4, 1, "", "resource"], [45, 1, 1, "", "runopts"]], "torchx.specs.AppStatus": [[45, 2, 1, "", "format"], [45, 2, 1, "", "raise_for_status"]], "torchx.specs.Resource": [[45, 2, 1, "", "copy"]], "torchx.specs.Role": [[45, 2, 1, "", "pre_proc"]], "torchx.specs.file_linter": [[45, 1, 1, "", "LinterMessage"], [45, 1, 1, "", "TorchFunctionVisitor"], [45, 1, 1, "", "TorchXArgumentHelpFormatter"], [45, 1, 1, "", "TorchxFunctionArgsValidator"], [45, 1, 1, "", "TorchxFunctionValidator"], [45, 1, 1, "", "TorchxReturnValidator"], [45, 4, 1, "", "get_fn_docstring"], [45, 4, 1, "", "validate"]], "torchx.specs.file_linter.TorchFunctionVisitor": [[45, 2, 1, "", "visit_FunctionDef"]], "torchx.specs.file_linter.TorchxFunctionArgsValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxFunctionValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxReturnValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.macros": [[45, 1, 1, "", "Values"]], "torchx.specs.macros.Values": [[45, 2, 1, "", "apply"], [45, 2, 1, "", "substitute"]], "torchx.specs.named_resources_aws": [[45, 4, 1, "", "aws_m5_2xlarge"], [45, 4, 1, "", "aws_p3_2xlarge"], [45, 4, 1, "", "aws_p3_8xlarge"], [45, 4, 1, "", "aws_t3_medium"]], "torchx.specs.runopts": [[45, 2, 1, "", "add"], [45, 2, 1, "", "cfg_from_str"], [45, 2, 1, "", "get"], [45, 2, 1, "", "is_type"], [45, 2, 1, "", "resolve"]], "torchx.tracker": [[46, 1, 1, "", "AppRun"]], "torchx.tracker.api": [[46, 1, 1, "", "TrackerBase"]], "torchx.tracker.backend.fsspec": [[46, 1, 1, "", "FsspecTracker"]], "torchx.workspace": [[47, 1, 1, "", "WorkspaceMixin"], [47, 0, 0, "-", "dir_workspace"], [47, 0, 0, "-", "docker_workspace"], [47, 4, 1, "", "walk_workspace"]], "torchx.workspace.WorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]], "torchx.workspace.dir_workspace": [[47, 1, 1, "", "DirWorkspaceMixin"]], "torchx.workspace.dir_workspace.DirWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"]], "torchx.workspace.docker_workspace": [[47, 1, 1, "", "DockerWorkspaceMixin"]], "torchx.workspace.docker_workspace.DockerWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:data", "4": "py:function", "5": "py:attribute"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "data", "Python data"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"]}, "titleterms": {"advanc": [0, 22, 46], "usag": [0, 14, 17, 20, 25, 31, 33, 46], "regist": 0, "custom": [0, 8, 12, 29], "schedul": [0, 2, 3, 25, 29, 34], "name": [0, 4, 45], "resourc": [0, 4, 8, 45], "compon": [0, 2, 3, 4, 5, 8, 12, 22, 25, 45], "app": [1, 14], "best": [1, 4, 25], "practic": [1, 4, 25], "data": [1, 14, 15, 46], "pass": [1, 8], "storag": 1, "train": [1, 10], "loop": 1, "metric": [1, 7], "checkpoint": 1, "fine": 1, "tune": 1, "interpret": [1, 6, 17], "model": [1, 17, 18], "packag": 1, "python": 1, "save": 1, "weight": 1, "torchscript": 1, "torchserv": 1, "archiv": 1, "mar": 1, "torch": 1, "serv": [1, 9], "infer": 1, "test": [1, 4], "basic": 2, "concept": 2, "project": 2, "structur": 2, "appdef": [2, 45], "runner": [2, 30], "pipelin": [2, 8, 21, 22, 23, 24, 25, 26, 28], "adapt": 2, "runtim": [2, 25], "next": [2, 27, 29], "step": [2, 27, 29], "cli": [3, 8, 31], "list": 3, "builtin": [3, 5, 8, 12], "support": 3, "argument": [3, 4, 22], "run": [3, 8, 45], "job": [3, 8, 46], "inspect": 3, "what": 3, "dryrun": 3, "describ": 3, "queri": [3, 46], "statu": [3, 45], "view": 3, "log": [3, 19], "entrypoint": 4, "simplifi": 4, "process": 4, "branch": 4, "logic": 4, "document": [4, 25], "compos": 4, "distribut": [4, 5, 23, 29], "defin": 4, "all": [4, 26, 34], "unit": 4, "integr": 4, "ddp": 5, "api": [5, 25, 31], "refer": [7, 25, 35, 36, 37, 38, 39, 40, 41, 42, 46], "overview": [8, 32, 33, 46], "us": 8, "author": 8, "valid": 8, "programmat": [8, 31], "param": 8, "from": 8, "addit": 8, "embed": 10, "script": 10, "util": 11, "hello": [12, 29], "world": [12, 13, 15, 29], "comput": [13, 15], "size": [13, 15], "exampl": [13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 46], "preprocess": [14, 15], "applic": [15, 25], "prerequisit": [15, 35, 38, 39, 40, 42], "lightn": 15, "trainer": [15, 16, 20], "dataset": 16, "tini": 18, "imagenet": 18, "simpl": 19, "profil": 19, "kubeflow": [21, 22, 23, 24, 28], "input": 22, "creat": 22, "definit": 22, "intro": 24, "torchx": [25, 26, 28, 30, 34, 45, 46, 47], "In": 25, "1": [25, 46], "2": [25, 46], "3": 25, "work": 25, "With": 25, "librari": 25, "airflow": 27, "kfp": 28, "quickstart": 29, "instal": 29, "launch": 29, "workspac": [29, 47], "patch": 29, "torchxconfig": [29, 31], "remot": 29, "imag": [29, 41], "docker": [29, 37], "base": [29, 33], "slurm": [29, 44], "function": [30, 31, 34], "class": [30, 34], "config": [31, 45], "track": 33, "resulttrack": 33, "fsspec": 33, "aw": [35, 36, 45], "batch": [35, 38], "sagemak": 36, "gcp": 38, "kubernet": [39, 40], "mcad": 40, "local": 41, "provid": 41, "ibm": 42, "spectrum": 42, "lsf": 42, "rai": 43, "spec": 45, "role": 45, "macro": 45, "mount": 45, "linter": 45, "tracker": 46, "setup": 46, "launcher": 46, "side": 46, "configur": 46, "user": 46, "acquir": 46, "apprun": 46, "instanc": 46, "trackerbas": 46, "implement": 46, "docker_workspac": 47, "dir_workspac": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx": 56}}) \ No newline at end of file +Search.setIndex({"docnames": ["advanced", "app_best_practices", "basics", "cli", "component_best_practices", "components/distributed", "components/interpret", "components/metrics", "components/overview", "components/serve", "components/train", "components/utils", "custom_components", "examples_apps/compute_world_size/main", "examples_apps/datapreproc/datapreproc", "examples_apps/index", "examples_apps/lightning/data", "examples_apps/lightning/interpret", "examples_apps/lightning/model", "examples_apps/lightning/profiler", "examples_apps/lightning/train", "examples_pipelines/index", "examples_pipelines/kfp/advanced_pipeline", "examples_pipelines/kfp/dist_pipeline", "examples_pipelines/kfp/intro_pipeline", "index", "pipelines", "pipelines/airflow", "pipelines/kfp", "quickstart", "runner", "runner.config", "runtime/overview", "runtime/tracking", "schedulers", "schedulers/aws_batch", "schedulers/aws_sagemaker", "schedulers/docker", "schedulers/gcp_batch", "schedulers/kubernetes", "schedulers/kubernetes_mcad", "schedulers/local", "schedulers/lsf", "schedulers/ray", "schedulers/slurm", "specs", "tracker", "workspace"], "filenames": ["advanced.rst", "app_best_practices.rst", "basics.rst", "cli.rst", "component_best_practices.rst", "components/distributed.rst", "components/interpret.rst", "components/metrics.rst", "components/overview.rst", "components/serve.rst", "components/train.rst", "components/utils.rst", "custom_components.md", "examples_apps/compute_world_size/main.rst", "examples_apps/datapreproc/datapreproc.rst", "examples_apps/index.rst", "examples_apps/lightning/data.rst", "examples_apps/lightning/interpret.rst", "examples_apps/lightning/model.rst", "examples_apps/lightning/profiler.rst", "examples_apps/lightning/train.rst", "examples_pipelines/index.rst", "examples_pipelines/kfp/advanced_pipeline.rst", "examples_pipelines/kfp/dist_pipeline.rst", "examples_pipelines/kfp/intro_pipeline.rst", "index.rst", "pipelines.rst", "pipelines/airflow.md", "pipelines/kfp.rst", "quickstart.md", "runner.rst", "runner.config.rst", "runtime/overview.rst", "runtime/tracking.rst", "schedulers.rst", "schedulers/aws_batch.rst", "schedulers/aws_sagemaker.rst", "schedulers/docker.rst", "schedulers/gcp_batch.rst", "schedulers/kubernetes.rst", "schedulers/kubernetes_mcad.rst", "schedulers/local.rst", "schedulers/lsf.rst", "schedulers/ray.rst", "schedulers/slurm.rst", "specs.rst", "tracker.rst", "workspace.rst"], "titles": ["Advanced Usage", "App Best Practices", "Basic Concepts", "CLI", "Component Best Practices", "Distributed", "Interpret", "Metrics", "Overview", "Serve", "Train", "Utils", "Custom Components", "Compute World Size Example", "Data Preprocessing App Example", "Application Examples", "Trainer Datasets Example", "Model Interpretability Example", "Tiny ImageNet Model", "Simple Logging Profiler", "Trainer Example", "Pipelines Examples", "Advanced KubeFlow Pipelines Example", "Distributed KubeFlow Pipelines Example", "Intro KubeFlow Pipelines Example", "TorchX", "torchx.pipelines", "Airflow", "Kubeflow Pipelines", "Quickstart", "torchx.runner", ".torchxconfig", "Overview", "Tracking", "torchx.schedulers", "AWS Batch", "AWS SageMaker", "Docker", "GCP Batch", "Kubernetes", "Kubernetes-MCAD", "Local", "IBM Spectrum LSF", "Ray", "Slurm", "torchx.specs", "torchx.tracker", "torchx.workspace"], "terms": {"torchx": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 18, 20, 21, 22, 23, 24, 27, 29, 31, 32, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "defin": [0, 2, 3, 8, 16, 22, 23, 24, 27, 29, 31, 34, 36, 40, 45, 46, 47], "plugin": [0, 34, 39, 40], "point": [0, 1, 2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "you": [0, 1, 2, 3, 4, 5, 7, 8, 10, 12, 14, 15, 16, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 38, 39, 40, 42, 43, 45, 47], "configur": [0, 4, 12, 27, 29, 30, 31, 34, 35, 36, 38, 40, 43, 44, 45, 47], "best": [0, 2, 3, 8, 10, 30], "support": [0, 1, 2, 4, 8, 11, 25, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "your": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 18, 22, 23, 24, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 39, 40, 42, 45, 46, 47], "infrastructur": [0, 1, 2, 13, 15, 32], "setup": [0, 3, 4, 13, 15, 16, 17, 20, 22, 35, 45], "most": [0, 1, 2, 3, 22, 45], "done": [0, 12, 27], "through": [0, 2, 3, 8, 27, 28, 29, 31, 45, 47], "python": [0, 2, 3, 4, 5, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 29, 30, 43, 45], "s": [0, 1, 3, 4, 5, 8, 11, 14, 15, 16, 17, 18, 19, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "entri": [0, 29, 30, 36], "requir": [0, 2, 4, 5, 8, 10, 11, 12, 14, 15, 17, 22, 29, 31, 32, 35, 36, 39, 40, 41, 43, 45, 46, 47], "packag": [0, 22, 27, 29, 30, 46], "contain": [0, 2, 3, 5, 8, 9, 11, 12, 15, 21, 22, 23, 24, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 47], "them": [0, 1, 2, 3, 4, 8, 12, 16, 17, 20, 22, 23, 26, 31, 34, 35, 44, 45], "instal": [0, 8, 12, 13, 15, 23, 25, 27, 28, 35, 37, 38, 39, 40, 42, 43, 44, 45], "If": [0, 1, 2, 3, 4, 8, 12, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "don": [0, 1, 2, 8, 10, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "t": [0, 1, 2, 4, 8, 10, 12, 18, 22, 27, 28, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "have": [0, 1, 2, 3, 4, 5, 8, 10, 12, 14, 15, 16, 18, 20, 22, 23, 24, 25, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "we": [0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 16, 18, 22, 23, 24, 27, 29, 34, 39, 44], "recommend": [0, 1, 2, 4, 6, 7, 30, 40], "make": [0, 1, 2, 4, 9, 13, 20, 22, 24, 29, 30, 31, 32, 33, 41, 45, 47], "one": [0, 1, 2, 3, 4, 5, 8, 22, 24, 29, 30, 31, 33, 36, 37, 39, 41, 45], "so": [0, 1, 3, 4, 5, 8, 12, 15, 16, 22, 26, 27, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "can": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 14, 15, 16, 18, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "share": [0, 4, 8, 12, 15, 29, 35, 42], "definit": [0, 1, 2, 4, 8, 9, 12, 15, 21, 23, 24, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "across": [0, 1, 2, 15, 16, 33], "team": 0, "org": [0, 7, 9, 12, 28, 29, 30, 45], "The": [0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 14, 15, 17, 21, 22, 26, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "describ": [0, 1, 2, 24, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "below": [0, 2, 3, 8, 17, 30, 31, 45], "specifi": [0, 2, 3, 4, 5, 7, 8, 11, 14, 16, 17, 19, 20, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "project": [0, 13, 29, 31, 38, 40, 47], "py": [0, 2, 3, 4, 5, 8, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 41, 42, 43, 45], "file": [0, 1, 2, 3, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 22, 23, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "from": [0, 1, 2, 3, 4, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "setuptool": 0, "import": [0, 1, 2, 3, 4, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 31, 33, 39, 40, 45, 46], "foobar": [0, 31, 33, 41, 45], "entry_point": [0, 46], "my_schedul": 0, "my": 0, "create_schedul": [0, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "named_resourc": [0, 8, 45], "gpu_x2": 0, "my_modul": [0, 45, 46], "mai": [0, 1, 2, 3, 8, 11, 12, 20, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "implement": [0, 15, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "class": [0, 1, 4, 16, 18, 19, 28, 29, 31, 33, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "interfac": [0, 2, 34, 35, 36, 37, 38, 39, 40, 43, 44, 46, 47], "function": [0, 2, 3, 4, 5, 8, 11, 12, 13, 18, 23, 24, 27, 28, 29, 36, 45], "should": [0, 2, 3, 4, 8, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "follow": [0, 2, 5, 8, 12, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "signatur": 0, "def": [0, 1, 2, 3, 4, 8, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 31, 43, 44, 45], "session_nam": [0, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "str": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "kwarg": [0, 16, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "object": [0, 2, 3, 8, 9, 16, 22, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "return": [0, 2, 4, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 27, 28, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "myschedul": 0, "thi": [0, 1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "ad": [0, 1, 2, 8, 30, 31, 44, 46], "an": [0, 2, 3, 4, 6, 7, 8, 10, 12, 13, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "A": [0, 2, 5, 8, 31, 45, 47], "set": [0, 1, 3, 4, 5, 11, 12, 22, 23, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 44, 45, 47], "predefin": [0, 2], "spec": [0, 2, 3, 4, 5, 8, 9, 12, 22, 23, 24, 25, 28, 29, 30, 31, 34, 35, 37, 39, 40, 41, 42], "ar": [0, 1, 2, 3, 4, 5, 7, 8, 11, 15, 17, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "given": [0, 2, 3, 8, 9, 16, 30, 31, 41, 43, 45], "string": [0, 3, 4, 8, 11, 29, 41, 45, 47], "particularli": 0, "us": [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "when": [0, 1, 2, 3, 4, 5, 7, 8, 22, 25, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cluster": [0, 2, 12, 14, 21, 22, 23, 24, 27, 29, 39, 40, 42, 43, 44, 45, 47], "ha": [0, 2, 4, 5, 8, 10, 11, 13, 15, 17, 19, 20, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "fix": [0, 2], "instanc": [0, 2, 4, 5, 7, 8, 12, 22, 28, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "type": [0, 1, 2, 3, 4, 5, 8, 9, 11, 12, 14, 16, 17, 18, 19, 20, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 42, 43, 44, 45], "For": [0, 1, 2, 4, 5, 7, 8, 10, 15, 16, 17, 20, 22, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "deep": 0, "learn": [0, 1, 2, 3, 10, 20, 27, 40], "train": [0, 4, 5, 7, 8, 13, 15, 16, 17, 18, 19, 20, 22, 25, 27, 29, 31, 33, 36, 40], "kubernet": [0, 2, 3, 5, 12, 14, 17, 22, 23, 25, 28, 29, 30, 31, 34, 35, 37], "aw": [0, 2, 25, 29, 34, 44], "compris": 0, "onli": [0, 2, 3, 5, 8, 20, 22, 23, 24, 28, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "p3": 0, "16xlarg": 0, "64": [0, 16, 17, 29, 45], "vcpu": 0, "8": [0, 5, 12, 29, 33, 41], "gpu": [0, 2, 3, 5, 11, 20, 27, 29, 39, 40, 41, 42, 45], "488gb": 0, "want": [0, 1, 2, 3, 4, 12, 22, 29, 31, 35], "enumer": [0, 17], "shirt": [0, 4], "size": [0, 4, 10, 16, 17, 20, 29, 33, 36, 42, 45], "gpu_x1": 0, "cpu": [0, 2, 3, 4, 5, 11, 20, 22, 29, 31, 39, 40, 45], "1": [0, 1, 2, 3, 4, 5, 8, 11, 12, 14, 16, 17, 18, 20, 22, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "memmb": [0, 2, 3, 5, 11, 22, 29, 39, 40, 45], "61_000": 0, "16": [0, 3, 12, 16, 29], "2": [0, 2, 3, 5, 8, 11, 12, 17, 20, 27, 28, 29, 31, 33, 39, 40, 41, 42, 45], "122_000": 0, "gpu_x3": 0, "32": [0, 20], "4": [0, 2, 5, 8, 12, 29, 31, 33, 39, 40, 41, 45], "244_000": 0, "gpu_x4": 0, "488_000": 0, "To": [0, 1, 2, 3, 8, 12, 14, 15, 16, 20, 22, 23, 24, 27, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "avail": [0, 8, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "need": [0, 1, 2, 3, 4, 7, 8, 13, 16, 22, 23, 24, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "via": [0, 1, 4, 7, 8, 11, 12, 14, 15, 16, 22, 23, 24, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onc": [0, 1, 5, 8, 12, 22, 23, 24, 27, 29, 30, 34, 41], "manner": 0, "get_named_resourc": [0, 4, 45], "122000": 0, "appdef": [0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "role": [0, 2, 3, 4, 5, 8, 12, 23, 24, 28, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "test_app": 0, "imag": [0, 2, 3, 4, 5, 7, 8, 9, 11, 12, 14, 15, 16, 17, 18, 22, 23, 24, 28, 31, 35, 36, 37, 39, 40, 42, 45, 47], "author": [0, 2, 3, 4, 10, 32], "cli": [0, 2, 4, 12, 15, 22, 25, 29, 30, 34, 35, 44, 46], "builtin": [0, 1, 2, 4, 13, 22, 23, 24, 27, 29, 30, 31], "possibl": [0, 1, 2, 4, 24, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "relev": [0, 41], "organ": [0, 8], "wai": [0, 1, 2, 5, 8, 12, 29, 33, 39, 40, 41], "user": [0, 1, 2, 3, 4, 10, 12, 29, 30, 31, 34, 35, 36, 39, 40, 41, 44, 45, 47], "see": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 22, 23, 24, 27, 28, 29, 30, 31, 35, 37, 38, 39, 40, 42, 44, 45, 47], "thei": [0, 2, 4, 5, 8, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "run": [0, 1, 2, 4, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "entrypoint": [0, 2, 3, 5, 8, 10, 11, 12, 23, 24, 30, 34, 41, 44, 45, 46], "my_project": 0, "bar": [0, 2, 3, 8, 11, 14, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "had": 0, "directori": [0, 4, 8, 11, 12, 13, 16, 27, 29, 31, 33, 35, 36, 37, 41, 42, 43, 44, 45, 46, 47], "structur": [0, 1, 4, 8, 34, 45], "project_root": 0, "baz": [0, 31], "And": [0, 2], "singl": [0, 2, 4, 5, 8, 13, 14, 15, 16, 17, 18, 24, 26, 28, 29, 31, 43, 44, 45], "call": [0, 2, 4, 5, 8, 11, 22, 23, 24, 28, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "trainer": [0, 1, 2, 3, 4, 5, 7, 8, 10, 17, 19, 22, 28, 30, 33, 40, 45], "were": [0, 2, 3, 12, 29, 30], "foo": [0, 2, 3, 11, 14, 28, 29, 30, 31, 33, 39, 40, 43, 45, 47], "search": [0, 20, 31], "modul": [0, 2, 3, 4, 5, 8, 11, 13, 15, 16, 17, 20, 26, 28, 29, 30, 32, 33, 45, 46, 47], "all": [0, 1, 3, 8, 15, 17, 20, 21, 22, 23, 24, 27, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "group": [0, 5, 13, 15, 27, 29, 30, 36, 44, 46], "found": [0, 3, 8, 12, 31, 44], "under": [0, 2, 3, 5, 8, 12, 17, 22, 29, 33, 46, 47], "prefix": [0, 3, 8, 29, 31, 36], "In": [0, 2, 3, 8, 22, 31, 33, 40, 47], "case": [0, 1, 2, 3, 4, 8, 22, 31, 40, 47], "would": [0, 1, 2, 4, 8, 12, 22, 29, 30, 31, 34, 41], "those": [0, 2, 28, 29, 31, 34, 41, 47], "__init__": [0, 16, 18, 19], "attempt": [0, 3, 19, 27, 29, 33, 36], "recurs": [0, 11], "namespac": [0, 14, 17, 20, 22, 29, 31, 39, 40], "without": [0, 1, 3, 15, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "howev": [0, 1, 2, 4, 10, 34, 40, 41, 45], "top": [0, 2, 29, 31, 37, 47], "level": [0, 2, 29, 30, 31, 33, 36, 39, 40, 46], "displai": [0, 39], "test": [0, 11, 13, 16, 17, 23, 28, 34, 39, 41, 43], "app": [0, 2, 3, 4, 6, 8, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 22, 23, 24, 25, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "own": [0, 1, 2, 3, 8, 11, 22, 29, 33, 34, 45, 46], "includ": [0, 1, 2, 3, 8, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "its": [0, 2, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "add": [0, 12, 14, 24, 29, 31, 34, 39, 40, 45, 47], "must": [0, 5, 8, 14, 16, 17, 18, 28, 29, 31, 35, 36, 37, 39, 40, 41, 42, 44, 45, 46, 47], "anoth": [0, 22, 33], "back": [0, 12, 33, 35], "e": [0, 2, 5, 8, 11, 15, 17, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "g": [0, 2, 5, 8, 11, 15, 17, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dist": [0, 4, 5, 8, 10, 12, 13, 15, 20, 22, 23, 29, 31, 40, 42, 45], "ddp": [0, 2, 4, 8, 12, 13, 15, 20, 22, 29, 30, 31, 42, 45], "versu": 0, "default": [0, 3, 5, 8, 12, 13, 14, 20, 22, 23, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 45], "two": [0, 2, 5, 12, 15, 22, 29, 33, 36, 39], "registri": [0, 29, 35, 36], "same": [0, 3, 8, 18, 20, 22, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "There": [0, 1, 2, 3], "overlap": 0, "differ": [0, 1, 2, 3, 4, 5, 8, 12, 14, 16, 31, 33, 40, 41, 44], "alias": 0, "concret": 0, "omit": [0, 2, 3, 8, 31], "shorter": 0, "underscor": 0, "_": [0, 14, 16, 18, 29], "_0": 0, "_1": 0, "etc": [0, 3, 15, 29, 34, 42], "exampl": [0, 2, 3, 6, 7, 8, 10, 12, 18, 19, 27, 28, 29, 31, 33, 35, 36, 37, 39, 40, 41, 42, 45, 47], "effect": [0, 4, 5, 17, 20, 29, 30, 35], "expos": [0, 30, 37, 39, 40, 41, 45, 46], "oppos": 0, "vanilla": 0, "11": [0, 12, 29], "3": [0, 3, 5, 8, 12, 16, 17, 18, 20, 23, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "util": [0, 1, 2, 3, 8, 10, 12, 13, 14, 15, 16, 17, 22, 25, 27, 29, 32, 35, 36, 38, 39, 40, 42, 44], "more": [0, 1, 2, 4, 5, 8, 10, 11, 12, 13, 17, 20, 22, 23, 24, 26, 27, 28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "written": [1, 2, 5, 31], "ani": [1, 4, 8, 11, 12, 15, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "languag": 1, "well": [1, 3, 4, 7, 8, 10, 11, 22, 26, 27, 28, 30, 44], "librari": [1, 2, 8, 11, 12, 16, 20, 29, 32, 36, 43], "allow": [1, 2, 3, 4, 5, 11, 12, 16, 22, 24, 26, 29, 31, 33, 34, 41, 44, 45, 46, 47], "maximum": [1, 7, 29, 40, 44], "flexibl": [1, 2, 4], "do": [1, 2, 3, 4, 8, 10, 13, 15, 18, 29, 30, 39, 41, 45], "standard": [1, 20, 22, 24, 29, 45, 47], "start": [1, 7, 8, 9, 12, 19, 25, 27, 29, 30, 31, 35, 36, 38, 41, 45], "provid": [1, 2, 3, 4, 6, 7, 9, 10, 11, 12, 17, 20, 22, 26, 28, 29, 30, 31, 33, 34, 35, 36, 37, 39, 40, 43, 44, 45, 46, 47], "consist": [1, 7, 15, 30, 33, 45], "built": [1, 3, 6, 12, 15, 17, 22, 24, 29, 33, 36, 47], "compon": [1, 6, 7, 9, 10, 11, 13, 15, 16, 20, 21, 23, 24, 26, 27, 28, 29, 30, 31, 34, 35, 38, 39, 40, 41, 42, 44], "applic": [1, 2, 3, 5, 6, 8, 9, 13, 20, 27, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "inform": [1, 2, 4, 8, 10, 29, 30, 34, 35, 36, 39, 45, 46], "how": [1, 2, 3, 4, 6, 7, 8, 10, 12, 13, 15, 22, 23, 24, 28, 29, 30, 31, 35, 36, 40, 45, 46], "handl": [1, 2, 3, 18, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "manag": [1, 9, 22, 29, 36, 41, 45], "fsspec": [1, 2, 7, 9, 11, 14, 15, 16, 17, 18, 20, 22, 29, 46, 47], "pluggabl": [1, 46], "filesystem": [1, 2, 12, 22, 29, 35, 39, 40, 47], "just": [1, 4, 8, 12, 18, 22, 24, 29, 30, 31], "chang": [1, 3, 12, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "input": [1, 2, 17, 29, 33, 36, 45, 46], "output": [1, 11, 14, 17, 19, 20, 22, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "path": [1, 2, 3, 4, 5, 7, 8, 9, 12, 13, 14, 16, 17, 18, 20, 22, 29, 30, 31, 33, 35, 36, 37, 39, 40, 41, 43, 45, 47], "access": [1, 2, 3, 4, 11, 12, 22, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "new": [1, 2, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "environ": [1, 4, 5, 11, 12, 20, 29, 31, 35, 36, 37, 38, 41, 43, 45, 46], "backend": [1, 2, 3, 5, 29, 30, 34, 46], "pytorch": [1, 2, 5, 7, 9, 11, 12, 13, 15, 16, 20, 22, 25, 27, 29, 39, 45], "lightn": [1, 2, 7, 16, 17, 18, 19, 20, 22], "out": [1, 2, 3, 4, 6, 8, 10, 11, 14, 17, 18, 29, 30, 33, 34, 44, 45], "box": [1, 4, 6, 8, 10, 11, 29, 34], "elsewher": 1, "seamless": 1, "integr": [1, 17, 22, 27], "remot": [1, 2, 5, 14, 16, 17, 20, 22, 25, 27, 34, 35, 36, 37, 39, 41, 47], "also": [1, 2, 3, 12, 13, 18, 29, 31, 33, 35, 36, 45], "easier": [1, 4, 9, 32], "transit": [1, 30], "distribut": [1, 2, 8, 10, 13, 15, 20, 21, 24, 25, 27, 28, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "elast": [1, 4, 5, 13, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lot": 1, "depend": [1, 2, 3, 8, 13, 15, 20, 22, 27, 29, 30, 34, 36, 39, 40, 41, 43, 45, 46, 47], "architectur": [1, 20], "which": [1, 2, 3, 5, 7, 8, 13, 15, 16, 19, 20, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "why": [1, 18, 28], "some": [1, 2, 3, 14, 15, 18, 22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "common": [1, 2, 4, 5], "choic": [1, 8], "pure": [1, 2, 4, 8], "light": 1, "ignit": 1, "log": [1, 7, 12, 15, 18, 20, 22, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "monitor": [1, 7, 20, 29, 36], "job": [1, 2, 4, 5, 11, 12, 13, 15, 23, 25, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "standalon": [1, 2, 8, 12, 18, 29, 30], "tensorboard": [1, 3, 7, 12, 19, 20, 22, 45], "sinc": [1, 2, 3, 4, 5, 7, 8, 28, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "nativ": [1, 37], "like": [1, 2, 4, 12, 17, 22, 24, 27, 29, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "s3": [1, 2, 9, 11, 22, 29, 33, 36, 45, 46], "gc": 1, "view": 1, "complex": [1, 4, 24, 26], "about": [1, 2, 5, 10, 27, 30, 33, 40], "while": [1, 2, 12, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "within": [1, 5, 8, 22, 27, 29, 30, 33, 34, 35, 36, 45, 46], "period": [1, 7], "recov": 1, "failur": [1, 45], "restart": [1, 37, 45], "paramet": [1, 2, 3, 4, 5, 7, 8, 9, 11, 18, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "lose": 1, "progress": [1, 7, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "thing": [1, 3, 4, 29, 33], "transfer": [1, 12], "resum": 1, "command": [1, 3, 8, 10, 11, 29, 31, 37, 45, 46], "line": [1, 3, 10, 27, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "argument": [1, 2, 5, 8, 10, 11, 20, 28, 29, 30, 31, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "transient": 1, "error": [1, 3, 11, 12, 13, 29, 41, 45], "continu": [1, 14, 16, 17, 26], "later": [1, 16, 31], "adjust": [1, 45], "rate": [1, 20], "load": [1, 2, 12, 16, 17, 20, 22, 29, 31, 33, 35, 36, 38], "less": [1, 29, 41, 44], "code": [1, 2, 4, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 31, 33, 36, 42, 44, 45, 46, 47], "better": [1, 4], "maintain": [1, 2], "number": [1, 5, 6, 8, 11, 12, 14, 16, 20, 29, 35, 36, 40, 41, 45, 46], "similar": [1, 2, 4, 29, 41, 45], "task": [1, 27, 39, 42], "captum": [1, 6, 15, 17], "analys": 1, "result": [1, 3, 4, 11, 17, 22, 28, 29, 30, 33, 34, 36, 41, 45, 46], "interact": [1, 9, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "jupyt": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29], "notebook": [1, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 47], "commun": [1, 42], "hasn": 1, "format": [1, 4, 5, 8, 19, 28, 29, 30, 31, 33, 34, 44, 45], "here": [1, 3, 5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "coupl": 1, "option": [1, 3, 4, 5, 7, 8, 9, 11, 12, 14, 16, 18, 20, 22, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "might": [1, 4, 29, 36], "ll": [1, 2, 3, 15, 22, 29, 31, 35, 39, 42], "state": [1, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dict": [1, 4, 5, 8, 9, 11, 19, 22, 29, 30, 31, 34, 35, 36, 37, 39, 40, 41, 44, 45, 47], "ckpt": [1, 17, 22], "pt": [1, 9, 18], "modelcheckpoint": [1, 20], "hook": [1, 4], "work": [1, 8, 9, 10, 12, 22, 23, 29, 31, 36, 39, 40, 41, 42, 43, 44, 47], "harder": 1, "reusabl": [1, 4], "creat": [1, 2, 3, 4, 6, 7, 11, 12, 17, 18, 27, 29, 31, 34, 35, 36, 39, 40, 41, 42, 43, 44, 45, 46, 47], "serializ": 1, "optim": [1, 18, 19, 29, 33], "execut": [1, 2, 5, 11, 12, 15, 27, 29, 33, 36, 39, 40, 41, 47], "perform": [1, 5, 9, 13, 15, 29, 45], "reli": [1, 3, 4, 5], "gil": 1, "These": [1, 4, 5, 9, 11, 15, 22, 26, 29, 45, 47], "complet": [1, 2, 4, 27, 30, 35, 36, 37, 39, 43, 44, 45], "self": [1, 3, 16, 18, 19, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "automat": [1, 29, 31, 39, 40, 45, 47], "convert": [1, 2, 8, 16, 17, 23, 24, 28], "document": [1, 5, 11, 15, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "export": [1, 12, 18, 20, 22, 27, 29, 36], "quantiz": 1, "version": [1, 8, 12, 15, 18, 20, 29, 30, 39, 40, 46], "both": [1, 3, 4, 5, 8, 29, 30, 45], "full": [1, 3, 4, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 26, 29, 36, 47], "precis": 1, "consum": [1, 4, 12, 17, 22, 29, 30], "9": [1, 12, 27, 29, 30, 40], "0": [1, 2, 3, 4, 5, 7, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "separ": [1, 11, 13, 22, 24, 29, 37, 42], "It": [1, 2, 3, 4, 13, 15, 16, 17, 18, 20, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quit": [1, 7], "doesn": [1, 22, 28, 37, 39, 40, 41, 45], "widespread": 1, "adopt": 1, "upload": [1, 14, 18, 21, 22, 23, 24, 29, 36, 47], "api": [1, 2, 3, 4, 9, 13, 22, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "reason": [1, 30], "write": [1, 2, 3, 4, 8, 12, 16, 29, 30, 33, 36, 41, 45], "custom": [1, 3, 4, 6, 10, 22, 25, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "logic": [1, 2, 3, 13, 15, 31, 45], "deploi": [1, 9, 40], "build": [1, 2, 12, 29, 35, 36, 37, 39, 47], "server": [1, 3, 5, 7, 40, 45], "typic": [1, 2, 13, 22, 23, 24, 31, 33, 41, 45], "unit": 1, "other": [1, 2, 3, 4, 8, 12, 16, 22, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "unittest": 1, "main": [1, 2, 4, 5, 11, 12, 13, 14, 15, 17, 20, 29, 33, 40, 41], "customapptest": 1, "testcas": 1, "test_main": 1, "none": [1, 3, 4, 5, 9, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "src": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "dst": [1, 5, 11, 29, 35, 37, 39, 40, 42, 45], "asserttru": 1, "high": [2, 30, 31], "behind": 2, "check": [2, 3, 12, 18, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "quickstart": [2, 8, 12, 25, 39], "guid": [2, 8, 12, 25, 29, 40], "workspac": [2, 12, 13, 25, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "patch": [2, 13, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "tool": [2, 3, 9, 44, 45], "submit": [2, 3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "stage": [2, 8, 11, 16, 19, 35, 38, 42], "ml": [2, 13, 25, 29, 36, 46], "platform": [2, 8, 24, 29], "abstract": [2, 22, 34, 41, 45, 46, 47], "uml": 2, "diagram": [2, 8, 30], "simpli": [2, 3, 8, 11, 14, 15, 31, 34, 41], "struct": 2, "actual": [2, 9, 13, 15, 22, 30, 31, 33, 34, 41, 45], "lingo": 2, "jobdefinit": 2, "yaml": [2, 21, 22, 23, 24, 28, 29, 39, 40], "disambigu": 2, "between": [2, 11, 16, 20, 29, 33, 35, 36, 45], "binari": [2, 3, 5, 11, 12, 29, 33, 41], "refer": [2, 3, 8, 15, 34, 43, 44, 45, 47], "understood": [2, 3], "simpl": [2, 3, 4, 8, 10, 12, 13, 14, 15, 17, 18, 29, 33, 34, 45], "echo": [2, 3, 8, 11, 12, 23, 24, 27, 35, 36, 38, 39, 40, 42, 44], "hello": [2, 3, 8, 10, 11, 13, 15, 23, 24, 25, 27, 31, 35, 36, 38, 39, 40, 44], "world": [2, 8, 11, 20, 31], "name": [2, 3, 5, 8, 9, 11, 12, 18, 20, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "bin": [2, 3, 23, 24], "tmp": [2, 3, 11, 12, 14, 17, 20, 22, 27, 28, 29, 33, 41], "arg": [2, 3, 4, 8, 11, 12, 14, 16, 17, 20, 22, 23, 24, 28, 29, 30, 33, 34, 36, 41, 44, 45, 47], "num_replica": [2, 3, 4, 8, 11, 23, 28, 29, 41, 42, 45], "As": [2, 7, 8, 10, 13, 24, 33], "dataclass": 2, "encod": [2, 33, 45], "pass": [2, 3, 4, 5, 11, 12, 16, 20, 28, 29, 30, 31, 33, 37, 39, 40, 41, 44, 45, 47], "few": [2, 3, 8, 29, 33, 34], "varieti": [2, 5], "topolog": [2, 5], "mean": [2, 3, 19, 29, 31, 33, 35, 40], "multipl": [2, 3, 4, 5, 8, 23, 24, 30, 31, 34, 40, 41, 45], "repres": [2, 8, 28, 31, 43, 45], "non": [2, 4, 27, 34, 39, 45], "homogen": [2, 5], "coordin": [2, 5, 29, 33, 45], "mani": [2, 10, 30, 34], "worker": [2, 5, 11, 20, 22, 24, 29, 33, 43, 45], "doc": [2, 4, 8, 12, 13, 23, 28, 29, 35, 38, 39, 40, 42, 45, 47], "what": [2, 8, 22, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "field": [2, 3, 4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "good": 2, "scratch": [2, 4], "rather": [2, 3, 8, 13, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "templet": [2, 5, 8], "think": [2, 8], "conveni": [2, 3, 30, 45], "factori": [2, 4, 8, 28, 34, 43, 45, 46], "method": [2, 4, 8, 17, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "unlik": [2, 31, 45], "map": [2, 5, 8, 28, 30, 31, 33, 45, 47], "granular": 2, "vari": [2, 22], "abov": [2, 3, 8, 14, 27, 45], "readi": [2, 11, 25], "hardcod": 2, "data": [2, 5, 16, 17, 20, 22, 29, 33, 34, 36, 42, 45], "parallel": [2, 5, 15, 22, 29, 35, 45], "style": [2, 4, 5, 13, 15, 29, 31, 45], "node": [2, 3, 4, 5, 8, 15, 20, 22, 24, 28, 29, 30, 35, 39, 40, 41, 45], "jobnam": 2, "nnode": [2, 5, 8, 29], "int": [2, 3, 4, 5, 7, 8, 11, 14, 16, 18, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "script_arg": [2, 5, 8], "single_gpu": 2, "resourc": [2, 3, 5, 11, 12, 22, 23, 28, 29, 30, 34, 35, 36, 39, 40, 41, 44], "1024": [2, 5, 11, 22, 29, 45], "parameter": 2, "up": [2, 4, 8, 22, 27, 28, 29, 30, 31, 33, 38, 40, 41, 45], "effort": [2, 33], "than": [2, 4, 5, 8, 13, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "try": [2, 16, 30, 39], "over": [2, 5, 11, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "gener": [2, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 36, 39, 44, 47], "everyth": [2, 3], "easi": [2, 5, 16, 22, 33], "cheap": 2, "base": [2, 3, 4, 8, 11, 12, 19, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "repetit": 2, "protip": 2, "composit": 2, "achiev": 2, "purpos": [2, 8, 12, 17, 22, 29, 33, 41], "dsl": [2, 23, 24, 28], "section": [2, 8, 31, 45, 46], "understand": [2, 4, 6, 24, 26, 33], "context": [2, 8, 12, 29, 35, 47], "befor": [2, 3, 4, 7, 15, 22, 30, 45, 47], "brows": [2, 3, 8, 27, 29], "fit": [2, 3, 20, 29, 36], "doe": [2, 3, 8, 9, 11, 12, 13, 15, 16, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "exactli": 2, "expect": [2, 5, 34, 38, 40, 41, 44, 45, 47], "launch": [2, 3, 5, 7, 8, 12, 13, 14, 15, 17, 22, 23, 24, 26, 27, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "onto": [2, 3, 14, 31, 34, 35], "app_spec": 2, "programmat": [2, 4, 12, 27, 29, 41, 43, 44, 47], "get_runn": [2, 8, 27, 30, 31], "appspec": [2, 35, 36, 37, 39, 43, 44], "list": [2, 4, 5, 8, 11, 14, 17, 18, 20, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "plug": 2, "workflow": [2, 3, 8, 11, 16, 27, 33], "specif": [2, 3, 5, 6, 24, 26, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "target": [2, 8, 17], "kubeflow": [2, 25, 26], "whatev": 2, "represent": 2, "kfp": [2, 21, 22, 23, 24], "containerop": [2, 24, 28], "accur": 2, "advanc": [2, 13, 21, 23, 24, 25, 29], "especi": [2, 4], "mini": 2, "control": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "flow": 2, "hpo": [2, 11, 19, 46], "sub": [2, 5, 29, 30, 32, 33], "inlin": [2, 24], "exact": [2, 3, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "semant": [2, 8, 30, 34, 47], "dynam": 2, "upstream": [2, 8], "take": [2, 3, 5, 11, 12, 22, 23, 24, 28, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "advantag": [2, 46], "featur": [2, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "tri": [2, 24], "canon": 2, "portabl": 2, "skip": [2, 17, 31, 47], "zero": [2, 17, 45], "echo_torchx": 2, "becaus": [2, 3, 13, 27, 31, 45], "essenti": [2, 3], "anywher": [2, 33], "agnost": [2, 20, 32], "fashion": [2, 22], "layer": [2, 12, 20, 29], "touch": [2, 11, 12], "infra": [2, 29, 36], "NOT": [2, 3, 16, 30, 31, 33, 41, 45], "boto3": [2, 35, 36], "input_path": [2, 14, 22], "session": [2, 30, 45, 46], "client": [2, 3, 12, 22, 23, 24, 34, 35, 36, 38, 39, 40, 41], "s3_input_path": 2, "split": [2, 13, 15], "bucket": [2, 9, 29, 33, 36], "kei": [2, 29, 31, 33, 36, 45, 46], "join": [2, 14, 16, 17, 18, 20, 22], "download_fil": 2, "torch": [2, 4, 5, 8, 13, 15, 16, 17, 18, 20, 29, 40, 45], "rest": 2, "breviti": [2, 3, 8, 31], "implicit": 2, "assumpt": [2, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "One": [2, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "storag": [2, 3, 7, 16, 22, 29, 36, 39, 40, 46], "introduc": 2, "system": [2, 29, 35, 40], "framework": 2, "alreadi": [2, 3, 12, 19, 29, 30, 31], "io": [2, 5, 7, 9, 11, 12, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "hood": [2, 5, 8, 33], "rewritten": 2, "pytorch_lightn": [2, 16, 18, 19, 20], "input_url": 2, "fs": [2, 14, 16, 18, 46, 47], "get_filesystem": 2, "open": [2, 14, 16, 17, 22, 23, 24, 33], "rb": [2, 14], "f": [2, 8, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "now": [2, 3, 12, 31], "compat": [2, 13, 15, 35, 36, 38, 39, 40, 41, 42, 43, 44, 46], "store": [2, 5, 22, 29, 31, 33, 35, 36, 45], "variou": [2, 8, 15, 31, 46], "With": [2, 27, 40], "exist": [2, 4, 7, 12, 14, 24, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "find": [2, 3, 31, 45], "pointer": 2, "ideal": 2, "time": [2, 3, 4, 7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "intend": [2, 24, 30, 33, 43, 45], "But": 2, "proper": 2, "perman": 2, "home": [2, 8, 12, 29, 30, 31], "even": [2, 3, 4, 41], "entir": [2, 45], "oss": [2, 18, 20], "until": [2, 4, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "happen": 2, "matur": 2, "commandlin": [3, 45, 47], "around": [3, 16, 22, 33], "runner": [3, 4, 8, 12, 25, 26, 27, 29, 31, 34, 35, 36, 43], "directli": [3, 4, 7, 8, 17, 22, 27, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "pipelin": [3, 7, 12, 15, 27, 29, 33, 45], "aka": [3, 30], "quickli": [3, 26], "iter": [3, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "incur": 3, "technic": 3, "cognit": 3, "overhead": 3, "deal": [3, 22, 33, 45], "doubt": 3, "help": [3, 4, 8, 12, 14, 15, 17, 20, 22, 29, 34, 38, 40, 45, 47], "consid": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "n": [3, 5, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "config": [3, 13, 29, 30, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "metric": [3, 4, 12, 19, 22, 25, 29, 30, 36, 46], "serv": [3, 12, 22, 25], "torchserv": [3, 9, 12, 18, 22], "get": [3, 8, 12, 16, 20, 22, 25, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "runopt": [3, 29, 30, 31, 34, 38, 40, 45, 47], "local_dock": [3, 12, 29, 31, 37, 45], "log_dir": [3, 12, 27, 29, 31, 41], "dir": [3, 7, 12, 13, 27, 29, 31, 41], "stdout": [3, 5, 11, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stderr": [3, 5, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica": [3, 5, 11, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "local_cwd": [3, 4, 5, 8, 12, 14, 17, 20, 25, 27, 29, 30, 31, 41, 44, 45], "slurm": [3, 4, 25, 34], "subcommand": [3, 8, 31, 46], "either": [3, 4, 8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "sched_nam": 3, "along": [3, 7, 8, 30], "cat": [3, 31], "my_trainer_spec": 3, "my_train": [3, 45], "detail": [3, 4, 20, 42], "chose": [3, 5, 29, 31, 34], "three": 3, "scheduler_arg": [3, 35, 39, 40], "known": [3, 11, 31, 34, 39], "run_opt": [3, 34, 38, 40], "run_config": 3, "each": [3, 4, 5, 11, 19, 20, 21, 22, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "shown": [3, 31, 36], "comma": [3, 29, 31, 37, 45], "delimit": [3, 8, 20, 31, 45], "k": [3, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "v": [3, 15], "pair": [3, 31, 45], "seen": [3, 8], "usag": [3, 8, 12, 27, 29, 30, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "h": [3, 5, 8, 11, 29, 45], "msg": [3, 8, 11, 12, 34, 35, 36, 38, 39, 40, 41, 42, 44, 45], "messag": [3, 8, 11, 27, 29, 34, 45], "show": [3, 8, 22, 27, 28, 29, 31], "exit": [3, 4, 7, 8, 12, 27, 29, 45], "put": [3, 14, 18, 27, 33], "togeth": [3, 23, 24, 44], "2022": 3, "06": [3, 12, 29], "15": [3, 12, 29], "08": 3, "57": [3, 12, 27, 29], "info": [3, 4, 5, 11, 12, 17, 22, 23, 24, 27, 28, 29, 30, 34, 35, 36, 37, 39, 40, 44], "locat": [3, 11, 29, 36, 38, 41, 43, 46], "crls3hcpwjmhc": 3, "By": [3, 41], "block": [3, 4, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "finish": [3, 12, 29, 36, 43], "instead": [3, 4, 5, 12, 27, 29, 30, 33, 34, 39, 43, 45, 47], "print": [3, 10, 12, 13, 14, 16, 17, 18, 20, 22, 23, 24, 27, 28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "url": [3, 9, 11, 14, 33, 34, 45], "form": [3, 8, 44, 45], "scheduler_nam": [3, 31], "job_id": [3, 46], "keep": [3, 4, 31, 34], "note": [3, 4, 5, 8, 11, 12, 14, 15, 20, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "identifi": [3, 18, 29, 34, 35, 45, 47], "regist": [3, 5, 9, 11, 29, 30, 31, 34, 45], "debug": [3, 5, 29, 36], "request": [3, 12, 29, 30, 34, 36, 39, 40, 41, 44, 45], "hello_world": [3, 12, 42, 45], "metadata": [3, 12, 22, 28, 29, 45, 46], "env": [3, 5, 11, 27, 29, 31, 37, 41, 44, 45], "max_retri": [3, 5, 11, 29, 39, 40, 44, 45], "port_map": [3, 28, 45], "capabl": [3, 5, 39, 40, 45, 47], "retry_polici": [3, 45], "retrypolici": [3, 45], "popenrequest": [3, 41], "app_id": [3, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "echo_c944ffb2": 3, "torchx_asmtmyqj": 3, "torchx_kiuk": 3, "role_param": [3, 41], "replicaparam": [3, 41], "torchelastic_error_fil": 3, "json": [3, 33, 34, 43, 45, 46], "role_log_dir": [3, 41], "look": [3, 29, 30, 31, 33, 45], "faux": 3, "local": [3, 5, 7, 12, 14, 16, 17, 20, 22, 25, 29, 30, 33, 34, 36, 37, 44, 47], "subprocess": [3, 18, 41], "popen": [3, 41], "simul": [3, 46], "posix": 3, "process": [3, 5, 13, 14, 15, 17, 22, 29, 34, 41], "nevertheless": 3, "valuabl": 3, "insight": 3, "translat": 3, "particular": [3, 4, 8, 31], "invers": 3, "That": [3, 45], "app_handl": [3, 30, 34, 45], "recreat": [3, 34, 39], "descript": [3, 8, 12, 14, 17, 20, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "alwai": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "100": [3, 12, 22, 30, 41], "wa": [3, 19, 27, 30, 34, 44, 45], "extent": [3, 30], "numer": [3, 33], "factor": 3, "describe_job": 3, "whether": [3, 5, 8, 29, 35, 36, 37, 39, 43, 45], "ignor": [3, 5, 11, 17, 29, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "never": [3, 16, 19], "spot": [3, 29, 36], "filter": [3, 30], "down": [3, 44], "larg": [3, 29, 33, 46], "long": [3, 30], "retain": [3, 29, 36], "archiv": [3, 9, 18], "behalf": [3, 41], "get_log": 3, "obtain": 3, "manual": [3, 4, 27, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "retent": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "properli": [3, 13], "wrapper": [3, 16], "let": [3, 8, 12, 14, 22, 29, 30], "pull": [3, 12, 41, 45], "place": [3, 4, 16, 17, 20, 22, 29, 40, 42, 44], "pattern": [3, 29, 30, 37, 45], "explanatori": 3, "id": [3, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "tail": [3, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "still": [3, 8, 33, 46], "regex": [3, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "except": [3, 16, 27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "role_nam": [3, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "replica_id": [3, 39, 40, 44, 45], "rank": [3, 20, 30, 33], "side": [3, 7], "appli": [3, 28, 29, 31, 37, 39, 40, 45, 47], "veri": [3, 6, 18, 29], "tax": 3, "host": [3, 5, 8, 11, 29, 30, 35, 36, 37, 39, 40, 41, 42, 45], "pleas": [3, 27, 29, 34, 42, 45], "judgment": 3, "status": [3, 45], "further": [3, 29, 34], "a5qvfhe1hyq2w": 3, "succeed": [3, 12, 29, 45], "d796ei2tdtest": 3, "em0iao2m90000": 3, "fail": [3, 12, 30, 37, 39, 45], "ew33oxmdg0123": 3, "design": [4, 25, 26, 27, 45], "deviat": 4, "necessari": [4, 15, 30, 34, 41, 45], "m": [4, 5, 8, 11, 12, 18, 22, 29], "docker": [4, 5, 8, 12, 22, 25, 34, 35, 36, 45, 47], "resolut": [4, 30], "isn": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "folder": [4, 14, 16, 18, 47], "regardless": 4, "img_nam": 4, "img_vers": 4, "reus": [4, 12, 16], "hard": [4, 32], "sort": 4, "manipul": 4, "imposs": 4, "convent": [4, 33], "avoid": [4, 29, 30], "where": [4, 5, 8, 11, 22, 26, 29, 31, 33, 36, 43, 44, 45, 46], "feel": 4, "statement": 4, "prefer": [4, 34, 39, 40, 41, 45], "trainer_test": 4, "_trainer": 4, "trainer_prod": 4, "10": [4, 12, 20, 29, 30, 40, 45], "ref": 4, "overview": [4, 25], "memori": [4, 5, 11, 29, 39, 40, 42, 44, 47], "alloc": [4, 22, 30, 34, 39, 40, 41, 44, 45], "independ": [4, 40], "schedul": [4, 5, 8, 11, 12, 13, 14, 15, 17, 20, 22, 23, 26, 27, 28, 30, 31, 32, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "behavior": [4, 7, 26, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "altern": [4, 30, 46], "merg": 4, "could": [4, 45], "ui": [4, 22, 23, 24, 28, 45, 46], "sidecar": 4, "servic": [4, 7, 22, 29, 34, 39, 40, 46], "re": [4, 22, 25, 27, 34, 39, 40, 45], "comput": [4, 18, 20, 35], "extend": [4, 46], "dictionari": [4, 29, 34, 36], "figur": [4, 18], "static": [4, 22, 42, 45], "pyre": [4, 16, 17, 18], "mypi": 4, "normal": [4, 12, 14, 15, 16, 22, 27, 29], "valid": [4, 11, 13, 15, 22, 30, 33, 34, 41, 45], "componenttestcas": 4, "ensur": [4, 13, 17, 20, 34], "pars": [4, 30, 33, 45], "stricter": 4, "component_test_bas": 4, "methodnam": 4, "runtest": 4, "sourc": [4, 5, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47], "run_compon": [4, 27, 30], "callabl": [4, 16, 41, 45, 46], "scheduler_param": [4, 30], "interv": [4, 30], "float": [4, 7, 8, 11, 18, 19, 20, 30, 31, 33, 45, 47], "timeout": [4, 7, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "appstatu": [4, 30, 45], "helper": [4, 47], "hide": 4, "poll": [4, 7, 30], "reach": [4, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "compplet": 4, "max": [4, 5, 45], "fixtur": 4, "exercis": 4, "teardown": [4, 16], "deconstruct": 4, "after": [4, 8, 17, 22, 29, 31, 36, 45], "function_nam": [4, 30], "fn": [4, 45], "bash": [4, 11, 44], "script": [4, 5, 8, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 29, 42, 44], "core": [4, 24, 45], "gang": [5, 37, 39, 40], "copi": [5, 11, 12, 22, 29, 37, 43, 45], "leverag": [5, 22, 24, 29], "express": [5, 29, 36], "overal": 5, "wise": 5, "wherea": 5, "num": [5, 29, 45], "assum": [5, 8, 17, 22, 29, 33, 35, 39, 40, 41], "x": [5, 18, 29, 42], "j": [5, 15, 20, 22, 29, 31, 42], "1x4": 5, "total": [5, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 30, 41], "2x4": 5, "rdzv_port": [5, 29], "master": [5, 22, 40], "port": [5, 7, 29, 45], "29500": [5, 29], "cfg": [5, 12, 13, 14, 20, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "queue": [5, 14, 23, 28, 29, 31, 35, 39, 42], "autosc": 5, "minimum": [5, 30, 39, 40, 45], "5": [5, 11, 12, 14, 16, 17, 29, 45], "5x8": 5, "compar": 5, "torchelast": [5, 29, 45], "read": [5, 16, 22, 23, 24, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "ghcr": [5, 7, 9, 11, 12, 29, 31], "7": [5, 7, 9, 11, 12, 29], "0dev0": [5, 7, 9, 11, 12, 29], "1x2": [5, 15, 20, 29, 31], "rdzv_backend": [5, 8, 29], "c10d": [5, 8, 29], "mount": [5, 11, 29, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "bool": [5, 8, 9, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "fals": [5, 8, 9, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "tee": [5, 29], "multi": [5, 8, 24, 29, 35, 39, 40, 43], "rendezv": [5, 29], "rendezvous_endpoint": [5, 29], "rank_0_host": [5, 29], "instruct": [5, 12, 15, 17, 29, 39, 40], "free": [5, 29, 30, 34, 41, 45], "random": [5, 16, 17, 20, 29], "mutual": [5, 11, 29, 45], "exclus": [5, 11, 29, 45], "preced": [5, 11, 29, 31, 41], "overrid": [5, 29, 30, 31, 34, 41, 43], "experimentnam": [5, 29], "runnam": [5, 29], "per": [5, 8, 11, 20, 22, 29, 33, 39, 41, 43, 44], "mb": [5, 11, 29, 45], "min_nnod": [5, 29], "nproc_per_nod": [5, 8, 29], "exce": [5, 29], "varibl": [5, 11, 29], "env1": [5, 11, 29, 37], "v1": [5, 8, 11, 12, 29, 37, 39, 40, 45], "env2": [5, 11, 29, 37], "v2": [5, 8, 11, 29, 37, 45], "env3": [5, 11, 29, 37], "v3": [5, 8, 11, 29, 37, 45], "retri": [5, 11, 29, 39, 40, 41, 45], "rank0": [5, 29], "chosen": [5, 29], "ex": [5, 11, 29, 35, 36, 37, 39, 40, 45], "bind": [5, 11, 29, 35, 37, 39, 40, 42, 45], "volum": [5, 11, 29, 35, 36, 37, 39, 40, 45], "readonli": [5, 11, 29, 35, 37, 39, 40, 45], "preset": [5, 29], "flag": [5, 8, 29], "enabl": [5, 12, 29, 36, 38, 40, 44, 46], "std": [5, 29], "stream": [5, 29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "consol": [5, 29], "_torch_debug_flag": 5, "commonli": 5, "variabl": [5, 11, 29, 31, 34, 36, 37, 41, 43, 45], "cuda_launch_block": 5, "nccl_desync_debug": 5, "torch_distributed_debug": 5, "torch_show_cpp_stacktrac": 5, "model": [6, 7, 9, 10, 15, 16, 20, 22, 27, 28, 29, 33, 36, 41, 46], "often": [6, 10, 33, 46], "thu": [6, 39, 40, 45], "analyz": [6, 17], "render": [6, 7], "cloud": [7, 16, 22, 38, 39, 40, 42], "Or": [7, 31], "part": [7, 12, 15, 19, 21, 24, 26, 28, 30, 33, 45], "tensorboardlogg": [7, 20], "tutori": [7, 11, 17], "http": [7, 9, 12, 13, 14, 15, 17, 22, 23, 28, 29, 30, 35, 38, 39, 40, 42, 44, 45, 47], "intermedi": [7, 12, 29], "tensorboard_tutori": 7, "html": [7, 9, 29, 35, 44, 45], "logger": [7, 19, 20], "readthedoc": 7, "en": [7, 23, 28, 42], "stabl": [7, 29], "extens": 7, "logdir": 7, "3600": 7, "6006": 7, "start_on_fil": 7, "exit_on_fil": 7, "termin": [7, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "condit": 7, "caus": [7, 17], "trigger": 7, "correspond": [7, 28, 31, 45], "second": [7, 13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 36, 44], "shutdown": 7, "illustr": 8, "Not": [8, 12, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "current": [8, 11, 12, 22, 28, 29, 31, 36, 39, 41, 43, 44, 45, 46, 47], "collect": [8, 13, 15, 27, 29], "categori": 8, "our": [8, 12, 16, 17, 20, 23, 24, 29], "page": 8, "ve": [8, 22], "being": [8, 29, 47], "downstream": [8, 30], "o": 8, "sure": [8, 22, 30, 31, 45], "rule": [8, 45, 47], "thumb": 8, "familiar": 8, "yourself": 8, "pep": 8, "484": 8, "annot": [8, 34, 45], "primit": [8, 45], "primitive_kei": 8, "primitive_valu": 8, "var_arg": 8, "docstr": [8, 45], "googl": [8, 12, 29, 38, 45], "function_with_pep484_type_annot": 8, "autogener": 8, "pick": [8, 31], "simplifi": 8, "os": [8, 14, 16, 17, 18, 20, 22, 41, 46], "aws_p3": [8, 45], "2xlarg": [8, 45], "basenam": [8, 14], "rdzv_endpoint": 8, "localhost": [8, 12, 15, 25, 41], "5900": 8, "nprocs_per_nod": 8, "save": [8, 14, 16, 17, 18, 20, 27, 29, 33, 36], "torchx_param": 8, "tip": [8, 31, 45], "improv": [8, 45], "posit": [8, 29], "dep": [8, 27], "machin": [8, 10, 39, 40, 45], "bodi": [8, 31], "Then": [8, 31], "reflect": [8, 47], "correctli": [8, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "easiest": 8, "dryrun": [8, 9, 30, 47], "linter": 8, "dist_test": 8, "ident": [8, 29, 41, 46], "fact": 8, "walk": [8, 14, 16, 47], "though": 8, "basic": [8, 12, 25, 34, 38, 40, 47], "invok": [8, 41, 45], "regular": [8, 14, 15, 29, 36], "component_modul": 8, "component_fn": 8, "rel": [8, 12, 13, 29, 30, 36, 41], "d": [8, 12, 29, 31], "drop": [8, 31], "slightli": [8, 29], "syntax": [8, 24], "component_path": [8, 30], "bob": [8, 31], "absolut": [8, 29, 30, 36, 41], "shell": [8, 44], "expans": 8, "cwd": [8, 29, 31, 41], "cd": [8, 15, 31], "know": [8, 22, 29, 45], "straight": 8, "forward": [8, 18], "program": [8, 11, 12, 14, 17, 29], "doubl": [8, 13], "dash": 8, "param_nam": 8, "param1": 8, "argpars": [8, 12, 14, 17, 20, 22], "parser": [8, 12, 14, 17, 20, 22], "summari": [8, 19], "imagin": 8, "comp": 8, "i": [8, 16, 17, 27, 29], "b": [8, 31], "l": 8, "vararg": [8, 31], "true": [8, 12, 14, 17, 18, 20, 22, 27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "k1": 8, "k2": 8, "k3": 8, "c": [8, 10, 11, 29, 31, 43], "henc": [8, 14, 31, 33, 45, 46], "end": [8, 27, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "arg1": 8, "arg2": 8, "arg3": 8, "adapt": [8, 22, 23, 24, 26, 28, 35, 45, 46], "orchestr": [8, 27], "expositori": [8, 13], "quick": [8, 16], "practic": [8, 10], "aim": 9, "infer": [9, 18, 20, 22, 29, 36], "model_path": [9, 22], "management_api": [9, 22], "param": [9, 22, 27, 45], "endpoint": [9, 22, 29, 36], "8081": [9, 22, 45], "root": [9, 14, 15, 16, 31, 41, 43, 45], "loop": 10, "construct": [10, 30, 45, 46], "emb": 10, "limit": [10, 11, 14, 16, 22, 29, 33, 41, 46], "smaller": 10, "sy": [10, 11, 12, 14, 17, 20, 22, 29], "argv": [10, 11, 12, 14, 17, 20, 22, 29], "cp": [11, 42], "meant": 11, "materi": [11, 44], "glue": 11, "oper": [11, 13, 15, 22, 23, 28, 29, 33, 45, 46], "meaning": 11, "sh": [11, 12, 23, 27, 28, 29, 39], "substitut": [11, 45], "destin": 11, "torchx_utils_python": [11, 29], "length": [11, 29], "booth": [11, 12], "x1": 11, "x2": 11, "trial_idx": 11, "tracker_bas": [11, 33], "evalu": [11, 29, 30, 36], "fsspecresulttrack": [11, 33], "outdir": 11, "uri": [11, 29, 33, 36], "tracker": [11, 12, 25, 27, 29, 33], "torchx_utils_binari": 11, "off": [12, 29], "anyth": [12, 29, 41], "writefil": [12, 29], "my_app": [12, 25, 29], "__name__": [12, 13, 14, 17, 20, 33], "__main__": [12, 13, 14, 17, 20, 33], "argumentpars": [12, 14, 17, 20, 22], "add_argu": [12, 14, 17, 20, 22], "person": [12, 31], "greet": 12, "parse_arg": [12, 14, 17, 20, 22], "friendli": 12, "my_compon": [12, 30, 31], "latest": [12, 28, 29, 35, 36, 39, 40, 41, 42, 45], "greeter": 12, "2024": [12, 27, 29], "04": [12, 27, 29], "53": [12, 29], "26": [12, 29], "temporari": [12, 27, 29], "delet": [12, 27, 29], "preserv": [12, 27, 29, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "torchx_mh72sa54": 12, "wait": [12, 27, 29, 30, 34, 36, 43], "27": [12, 29], "tx0hp3qlcb737": 12, "won": [12, 29, 44], "colab": [12, 29], "com": [12, 15, 22, 29, 35, 36, 37, 38, 39, 40, 42, 44, 45, 47], "dockerfil": [12, 29, 47], "0rc1": 12, "34": [12, 29, 40], "driver": [12, 45], "intern": 12, "dockerignor": [12, 47], "2b": 12, "0s": 12, "99b": 12, "4s": 12, "425b": 12, "sha256": [12, 29, 47], "a738949601d82e7f100fa1efeb8dde0c35ce44c66726cf38596f96d78dcd7ad3": 12, "resolv": [12, 29, 30, 41, 45], "6009a622672af862e3a3979ffd58a348f95208a4bc3b6f6cea2efda4e8390203": 12, "0b": 12, "94mb": 12, "1s": 12, "4bbfd2c87b7524455f144a03bf387c88b6d4200e5e0df9139a9d5e79110f89ca": 12, "70mb": 12, "d2e110be24e168b42c1a2ddbc4a476a217b73cccdba69cdcb212b812a88f5726": 12, "857b": 12, "889a7173dcfeb409f9d88054a97ab2445f5a799a823f719a5573365ee3662b6f": 12, "189b": 12, "2s": 12, "20": [12, 29], "97mb": 12, "3s": 12, "25kb": 12, "3dbec59e804974689ff0739216fb012d3e1cd6694632cd3a85b74b572266ec5c": 12, "21kb": 12, "143f801954310499daa44a8499a49797d2f282b5d56be601557ebe6cbf796907": 12, "00gb": 12, "eccbe17c44e1b27c836dddc42f204bde06f73568b50833556b50324146bd43aa": 12, "132b": 12, "extract": [12, 16, 29, 36], "06b5edd6bf524455a7c5a54cb27ced3ecc540414ecf38c24c80ba4368ebc77d": 12, "257b": 12, "d4c7af0d4fa735e6727a24afcea4022492c7f29ac85e31ddf3d385bfbf704f71": 12, "21": [12, 29, 39], "46mb": 12, "39mb": 12, "5s": 12, "f18d016c4ccc1c57c9e34cb119e1e1966287b08aa9b4d52a38b30815a56574b6": 12, "71gb": 12, "6s": 12, "30587ba7fd6bcbd1c883125d84517055b2d7f2d35a13faedbc8b15f94b900cc2": 12, "352b": 12, "7s": 12, "c0ad16d9fa05dbf708784e8aa10d69153465bae391345020be52cbe0a1701932": 12, "92b": 12, "909695be1d5003de345714eec2ca3020a48dd8f407fe918cbd47a8db73d7a233": 12, "341": 12, "29mb": 12, "8s": 12, "101": 12, "74mb": 12, "27mb": 12, "66mb": 12, "48": 12, "23mb": 12, "75": 12, "50mb": 12, "210": 12, "76mb": 12, "102": 12, "131": 12, "07mb": 12, "9s": 12, "191": 12, "89mb": 12, "163": 12, "58mb": 12, "92mb": 12, "325": 12, "06mb": 12, "223": 12, "35mb": 12, "292": 12, "55mb": 12, "253": 12, "287": 12, "31mb": 12, "427": 12, "82mb": 12, "310": 12, "38mb": 12, "385": 12, "88mb": 12, "337": 12, "64mb": 12, "528": 12, "48mb": 12, "482": 12, "34mb": 12, "f119a6d0a466a041afbcb08344ff624b5c5ac5f68b93d33af4827529ea1a6800": 12, "563": 12, "38kb": 12, "88d87059c913e67971846680d4032b75f96f599f8a67062f668fed6471fc2968": 12, "556": 12, "96kb": 12, "579": 12, "86mb": 12, "631": 12, "24mb": 12, "6": [12, 29, 39], "666": 12, "760": 12, "22mb": 12, "734": 12, "00mb": 12, "858": 12, "78mb": 12, "956": 12, "30mb": 12, "849": 12, "06gb": 12, "15gb": 12, "26gb": 12, "36gb": 12, "950": 12, "01mb": 12, "46gb": 12, "56gb": 12, "69gb": 12, "05gb": 12, "12": [12, 29, 41], "16gb": 12, "13": [12, 16, 27, 29, 40], "27gb": 12, "39gb": 12, "50gb": 12, "17": [12, 29], "61gb": 12, "18": [12, 29, 39, 40], "19": [12, 29], "82gb": 12, "94gb": 12, "22": [12, 29, 39, 40], "25": [12, 29], "30": [12, 29, 36, 43], "91": 12, "5411b2b2206f1dc6d9eccb60e0d3ac0f65bdfc9aa2dbb325a18451a61777439b": 12, "55": [12, 29], "disabl": [12, 29, 31, 36, 44], "07": [12, 29], "warn": [12, 27, 29, 34], "fall": 12, "404": 12, "43": 12, "tag": [12, 27, 29, 35, 36, 47], "amp": 12, "fromimag": 12, "deni": 12, "repositori": [12, 29, 35, 36, 37, 39, 40, 47], "39": [12, 27, 29], "login": [12, 47], "step": [12, 22, 24, 25, 45, 46], "gt": [12, 27, 29], "5411b2b2206f": 12, "b196fd38cde5": 12, "label": [12, 17, 29, 36], "f08bf61d5238": 12, "remov": [12, 27, 29, 30, 39], "32818bcb65f3": 12, "successfulli": [12, 29, 30, 45], "32818bcb65f3422eb5378f3bc1dd95f50308e4ef5faf3f31e0329bf2f6b5b345": 12, "origin": [12, 29, 35, 36, 37, 39, 43, 44, 45], "14": [12, 18], "v94dh1tjtmjxlc": 12, "push": [12, 29, 35, 36, 37, 39, 40, 47], "premad": 12, "discov": 12, "spmd": 12, "36": 12, "803d8840e4a9": [12, 29], "41": 12, "bdf53e9b5509": 12, "2b7101fd3b19": 12, "45": 12, "2b432f1ccee8": 12, "2b432f1ccee87051113dd2e0a651be0bb9035661f30a35717df9eea552bbed06": 12, "z0cz2jdx6c51vc": 12, "click": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "download": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 29, 36, 41, 42], "minim": [13, 15], "initi": [13, 15, 16, 20, 29, 36, 43, 45], "all_reduc": [13, 15, 29], "enough": [13, 15], "compute_world_s": [13, 15], "submodul": 13, "e2": [13, 25], "diff": [13, 47], "hydra": 13, "stack": 13, "been": [13, 17, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "seriou": 13, "omegaconf": 13, "dictconfig": 13, "multiprocess": 13, "record": [13, 19, 27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "to_yaml": 13, "throw": [13, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "rais": [13, 19, 27, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "runtimeerror": 13, "compos": [13, 14, 16], "ipython": 13, "pwd": 13, "ab": 13, "cc": 13, "jupyter_notebook": 13, "initialize_config_modul": 13, "config_modul": 13, "config_nam": 13, "minut": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24, 29, 44], "000": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "ipynb": [13, 14, 16, 17, 18, 19, 20, 22, 23, 24], "galleri": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "sphinx": [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24], "torchvis": [14, 15, 16, 18, 29], "reupload": [14, 15], "datapreproc": [14, 17, 22], "cs231n": [14, 22], "stanford": [14, 22], "edu": [14, 22], "tini": [14, 15, 16, 22], "imagenet": [14, 15, 16, 22], "200": [14, 18, 22], "zip": [14, 15, 21, 22], "output_path": [14, 16, 17, 20, 22, 29, 36], "tarfil": [14, 16], "tempfil": [14, 17, 20], "zipfil": 14, "pil": [14, 16], "transform": [14, 16, 22, 26, 28], "dataset": [14, 15, 17, 18], "is_image_fil": [14, 16], "tqdm": [14, 16, 29], "tar": [14, 16, 45], "gz": [14, 16], "download_and_extract_zip_arch": 14, "r": [14, 15, 16, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "zip_ref": 14, "extractal": [14, 16], "temporarydirectori": [14, 17, 20], "tmpdir": [14, 16, 17, 18, 20], "img_root": [14, 16, 41, 45], "splitext": 14, "totensor": [14, 16], "topilimag": [14, 16], "image_fil": [14, 16], "fname": [14, 16], "append": [14, 16, 17, 20, 22, 45], "len": [14, 16, 18], "break": [14, 30], "minit": [14, 16], "2000": [14, 16], "tar_path": [14, 16], "pack": [14, 16], "mode": [14, 16, 29, 30, 36, 39, 40], "w": [14, 33], "arcnam": 14, "rpath": [14, 16, 18], "get_fs_token_path": [14, 16, 18], "assert": [14, 16, 17, 18, 27], "rm": 14, "global": [14, 17, 20, 22], "sphinx_gallery_thumbnail_path": [14, 16, 17, 18, 19, 20, 22, 23, 24], "_static": [14, 16, 17, 18, 19, 20, 22, 23, 24], "img": [14, 16, 17, 18, 19, 20, 22, 23, 24, 41, 47], "png": [14, 16, 17, 18, 19, 20, 22, 23, 24], "demonstr": [15, 33], "themselv": 15, "notic": [15, 35, 38, 42, 47], "pip": [15, 25, 29, 35, 38, 39], "git": [15, 29, 36], "clone": [15, 29, 36], "github": [15, 22, 39, 40, 44, 46], "torchx_vers": 15, "sed": 15, "checkout": [15, 27, 29, 47], "dev": [15, 25, 29, 35, 39, 40, 42, 45], "txt": [15, 29, 43, 45, 46], "repo": [15, 29, 36, 47], "interpret": [15, 18, 22, 25, 41, 45], "sever": [15, 45], "ism": 15, "respect": [15, 34, 47], "profil": [15, 20, 29, 36], "examples_apps_python": 15, "examples_apps_jupyt": 15, "numpi": [16, 17, 29], "pl": [16, 18, 20], "dataload": [16, 17], "imagefoldersamplesdataset": 16, "imagefold": 16, "sampl": [16, 46], "num_sampl": [16, 20], "super": [16, 18, 19], "__len__": 16, "fixm": [16, 17, 18, 31], "attribut": [16, 17, 43], "test_d": 16, "train_d": 16, "val_d": 16, "tinyimagenetdatamodul": [16, 17, 20], "lightningdatamodul": 16, "data_dir": [16, 17, 20], "batch_siz": [16, 17, 20], "loader": 16, "img_transform": 16, "val": [16, 18, 20], "train_dataload": 16, "val_dataload": 16, "test_dataload": [16, 17], "download_data": [16, 17, 20], "remote_path": [16, 18], "unextract": 16, "isdir": 16, "data_path": [16, 17, 20, 22], "create_random_data": [16, 17, 20], "num_imag": 16, "250": 16, "fill": [16, 31, 34], "randomli": 16, "64x64": 16, "preprocess": [16, 17, 22], "train_path": 16, "class1_train_path": 16, "class1": 16, "class2_train_path": 16, "class2": 16, "val_path": 16, "class1_val_path": 16, "class2_val_path": 16, "test_path": 16, "class1_test_path": 16, "class2_test_path": 16, "makedir": [16, 17, 20], "fileexistserror": 16, "rang": [16, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pixel": 16, "rand": 16, "255": 16, "im": 16, "fromarrai": 16, "astyp": 16, "uint8": 16, "rgb": 16, "rand_image_": 16, "jpeg": 16, "process_imag": 16, "lib": [16, 18, 19, 29, 45], "seri": [17, 29, 36], "gradient": [17, 22], "overlai": [17, 29, 47], "ai": 17, "cifar_torchvision_interpret": 17, "load_path": [17, 20, 22], "last": [17, 22, 45], "viewer": [17, 28], "visual": 17, "equal": [17, 45], "benefit": 17, "swap": 17, "itertool": 17, "tinyimagenetmodel": [17, 18, 20], "otherwis": [17, 20, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "crash": [17, 45], "np": 17, "attr": 17, "integratedgradi": 17, "viz": 17, "checkpoint": [17, 20, 22, 29, 36], "weight": [17, 33], "analysi": 17, "convert_to_rgb": 17, "arr": 17, "tensor": [17, 18, 29], "ndarrai": 17, "24": [17, 29, 36, 40, 45], "arrai": 17, "squeez": 17, "swapax": 17, "shape": 17, "invalid": [17, 30, 45], "produc": [17, 22, 28, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "init": [17, 20], "load_from_checkpoint": [17, 20], "checkpoint_path": [17, 20], "els": [17, 20, 22], "ig": 17, "first": [17, 22, 23, 24, 29, 30, 34, 45, 46, 47], "islic": 17, "unsqueez": 17, "dim": 17, "zero_grad": 17, "attr_ig": 17, "delta": 17, "baselin": 17, "return_convergence_delta": 17, "count_nonzero": 17, "toi": [17, 18], "sometim": 17, "due": [17, 39, 41], "fig": 17, "axi": 17, "visualize_image_attr": 17, "blended_heat_map": 17, "sign": [17, 29, 37], "show_colorbar": 17, "titl": 17, "out_path": [17, 18], "ig_": 17, "heatmap": 17, "wb": 17, "savefig": 17, "regress": 18, "tupl": [18, 28, 35, 36, 39, 40, 44, 45, 47], "jit": 18, "nn": 18, "torchmetr": 18, "accuraci": [18, 30, 33], "resnet": [18, 29], "basicblock": [18, 29], "lightningmodul": 18, "linear": [18, 29], "net": 18, "layer_s": 18, "lr": [18, 20], "001": 18, "small": [18, 29, 39, 40], "tweak": 18, "match": [18, 31, 44], "tinyimagenet": 18, "avgpool": 18, "adaptiveavgpool2d": 18, "fc": [18, 29], "out_featur": [18, 29], "train_acc": [18, 20], "val_acc": [18, 20], "training_step": 18, "batch": [18, 20, 25, 34], "batch_idx": 18, "_step": 18, "validation_step": 18, "val_batch": 18, "step_nam": 18, "acc_metr": 18, "y": 18, "y_pred": 18, "loss": 18, "cross_entropi": 18, "_loss": 18, "_acc": 18, "todo": 18, "aivan": 18, "fb": 18, "cannot": [18, 27, 41, 42, 45], "configure_optim": 18, "adamw": 18, "export_inference_model": [18, 20], "torchscript": 18, "serial": [18, 33, 43], "dure": [18, 29, 36, 41, 45], "jite": 18, "jit_path": 18, "model_jit": 18, "model_nam": [18, 22], "tiny_image_net": [18, 22], "mar_path": 18, "mar": [18, 22], "handler": 18, "durat": [19, 29, 36], "ax": 19, "lightningloggerbas": 19, "baseprofil": 19, "simpleloggingprofil": [19, 20], "action": [19, 20, 30], "report": [19, 30], "duration_": 19, "event": [19, 29, 44], "current_act": 19, "action_nam": 19, "valueerror": [19, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "monoton": 19, "stop": [19, 30, 45], "end_tim": 19, "start_tim": 19, "pop": 19, "log_metr": 19, "runtim": [20, 28, 29, 31, 32, 33, 37, 39, 40, 41, 45, 47], "epoch": [20, 22], "log_path": [20, 22], "skip_export": 20, "1x1": [20, 22], "addit": [20, 29, 31, 36, 40, 41, 45], "callback": 20, "store_tru": 20, "narg": 20, "mlp": 20, "hidden": 20, "neural": 20, "get_model_checkpoint": 20, "behav": [20, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "deadlock": 20, "train_loss": 20, "dirpath": [20, 43], "save_last": 20, "checkpoint_callback": 20, "save_dir": 20, "lightning_log": [20, 22], "num_nod": 20, "group_world_s": 20, "acceler": 20, "cuda": 20, "is_avail": 20, "devic": [20, 29, 35, 37, 39, 40, 41, 45], "local_world_s": 20, "strategi": 20, "max_epoch": 20, "acc": 20, "intro": 21, "examples_pipelines_python": 21, "examples_pipelines_jupyt": 21, "someth": [22, 26], "dist_ddp": 22, "utils_copi": 22, "utils_python": 22, "container_from_app": [22, 24, 28], "modifi": [22, 45, 46], "rebuild": [22, 47], "awai": 22, "blob": [22, 33, 40], "readm": [22, 29], "md": [22, 40], "svc": 22, "somewher": 22, "copy_app": 22, "next": 22, "raw": [22, 30, 39, 45], "previou": [22, 27, 45, 46], "ahead": 22, "fulli": [22, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "processed_data_path": 22, "datapreproc_app": 22, "fast": [22, 25], "autom": 22, "__file__": 22, "dirnam": 22, "logs_path": 22, "models_path": 22, "trainer_app": 22, "3000": 22, "ui_metadata": [22, 28], "serve_app": 22, "initial_work": 22, "interpret_path": 22, "interpret_app": 22, "track": [22, 25, 29, 39, 44, 46], "set_tti": 22, "respons": [22, 34, 45], "compil": [22, 23, 24, 28], "pipeline_func": [22, 23, 24, 28], "package_path": [22, 23, 24, 28], "rt": [22, 23, 24], "advanced_pipelin": 22, "resource_from_app": [23, 28], "volcano": [23, 28, 29, 31, 39], "echo_app": [23, 24], "alpin": [23, 24, 35, 36, 39, 40, 42], "instanti": [23, 24, 34, 41], "echo_contain": [23, 24], "baseop": 23, "sdk": [23, 24, 28, 29], "chain": [23, 24, 33], "dist_pipelin": 23, "introductori": 24, "cross": 24, "mechan": [24, 37, 45, 46], "wherev": 24, "component_from_app": [24, 28], "convers": 24, "intro_pipelin": 24, "univers": 25, "launcher": 25, "research": 25, "product": 25, "concept": [25, 29, 39, 40], "torchxconfig": [25, 46], "mcad": [25, 29, 34], "rai": [25, 29, 34], "sagemak": [25, 29, 34], "ibm": [25, 34], "spectrum": [25, 34], "lsf": [25, 29, 34], "gcp": [25, 29, 34], "airflow": [25, 26], "deploy": [26, 40, 41], "assembl": 26, "easili": 27, "No": 27, "special": 27, "datetim": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "pendulum": 27, "dagrunst": 27, "taskinstancest": 27, "dagruntyp": 27, "dag": 27, "decor": 27, "data_interval_start": 27, "2021": [27, 29], "tz": 27, "utc": 27, "data_interval_end": 27, "timedelta": 27, "dai": [27, 29, 44], "virtualenv": [27, 44], "task_id": 27, "hello_torchx": 27, "run_torchx": 27, "statu": [27, 29, 30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 47], "wait_interv": [27, 30], "raise_for_statu": [27, 45], "didn": 27, "succe": 27, "final": [27, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47], "log_lin": [27, 30], "make_uniqu": 27, "dag_id": 27, "example_python_oper": 27, "schedule_interv": 27, "start_dat": 27, "catchup": 27, "run_job": 27, "dagrun": 27, "create_dagrun": 27, "execution_d": 27, "data_interv": 27, "run_typ": 27, "ti": 27, "get_task_inst": 27, "get_task": 27, "ignore_ti_st": 27, "success": 27, "ipykernel_4218": 27, "454499020": 27, "removedinairflow3warn": 27, "deprec": [27, 30, 45], "futur": [27, 30, 45, 46], "releas": [27, 39, 40, 46], "15t15": 27, "49": 27, "440": 27, "0000": 27, "taskinst": 27, "2073": 27, "met": 27, "dep_context": 27, "requeueabl": 27, "lt": [27, 29], "d3ghzhj3zbttmd": 27, "manual__2021": 27, "09": [27, 29], "13t00": 27, "00": [27, 29], "446": 27, "447": [27, 29], "2303": 27, "2385": 27, "queued_dur": 27, "457": 27, "2327": 27, "_pythondecoratedoper": 27, "701": 27, "2644": 27, "var": [27, 29, 34, 41], "airflow_ctx_dag_own": 27, "airflow_ctx_dag_id": 27, "airflow_ctx_task_id": 27, "airflow_ctx_execution_d": 27, "airflow_ctx_try_numb": 27, "airflow_ctx_dag_run_id": 27, "703": 27, "430": 27, "endgroup": 27, "50": [27, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "375": 27, "72": 27, "379": 27, "local_schedul": [27, 34, 41], "771": 27, "380": 27, "777": 27, "torchx_p5ojxbdz": 27, "486": 27, "237": 27, "valu": [27, 29, 31, 33, 36, 39, 40, 41, 45, 46, 47], "487": 27, "441": 27, "post": 27, "491": 27, "1205": 27, "mark": 27, "20210913t000000": 27, "20240415t155749": 27, "end_dat": 27, "20240415t155750": 27, "goe": 27, "unspecifi": 28, "app_def": 28, "service_account": [28, 29, 39, 40], "resourceop": 28, "containerfactori": 28, "equival": [28, 29, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "load_component_from_": 28, "www": [28, 42], "component_spec_from_app": 28, "notabl": 28, "protocol": 28, "log_level": 29, "cancel": [29, 30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "job_nam": [29, 36], "overwrit": [29, 30, 41], "extra": [29, 30, 45, 47], "itself": 29, "54": 29, "torchx_fj9jio7k": 29, "nn99tqg2cmn6r": 29, "56": 29, "58": 29, "01": 29, "695d2846818e": 29, "2fa7d3bbf02a": 29, "05": 29, "95856f7d6c9a": 29, "95856f7d6c9a9cb1d83b8e6a398b164800d994de4ffbc76c8064e48c6a48f27": 29, "mpzvzd0kr3lf9c": 29, "relat": [29, 45], "interest": 29, "dist_app": [29, 42], "init_process_group": 29, "gloo": [29, 42], "am": 29, "get_rank": 29, "get_world_s": 29, "2x2": [29, 42], "562e27958b97": 29, "b2fae9f9d2a": 29, "86ae5bbe4c84": 29, "86ae5bbe4c844c347143f95b42531c1e4411ad0c5e00a6cc6ef74d6a7508e2ad": 29, "omp_num_thread": 29, "overload": 29, "tune": [29, 33], "465": 29, "xgb0tsflbdb5kc": 29, "aws_batch": [29, 35, 36], "basi": [29, 39], "daemon": [29, 47], "image_repo": [29, 35, 36, 37, 39, 40], "partit": [29, 44], "copy_env": [29, 37], "quiet": [29, 35, 36, 37, 39], "glob": [29, 37], "foo_": [29, 37], "eiher": [29, 37], "semicolon": [29, 37], "ones": [29, 31, 37, 45], "suppress": [29, 35, 36, 37, 39], "verbos": [29, 35, 36, 37, 39], "prepend_cwd": [29, 41], "auto_set_cuda_visible_devic": [29, 41], "prepend": [29, 41], "cuda_available_devic": [29, 41], "assign": [29, 41, 45], "noth": [29, 30, 31, 41], "count": [29, 39, 40, 41], "comment": [29, 44], "constraint": [29, 44], "mail": [29, 44], "job_dir": [29, 44, 47], "hour": [29, 44], "torchxslurmjobdir": [29, 44], "priority_class": [29, 39], "account": [29, 39, 40, 44], "pod": [29, 39, 40], "priorityclass": [29, 39, 40], "kubernetes_mcad": [29, 40], "prioriti": [29, 35, 40, 45], "priority_class_nam": [29, 40], "image_secret": [29, 40], "coscheduler_nam": [29, 40], "network": [29, 36, 40, 41, 42], "higher": [29, 33, 35, 40], "integ": [29, 40], "admin": [29, 40], "openshift": [29, 40], "secret": [29, 40], "privat": [29, 36, 40, 42], "co": [29, 40], "beyond": [29, 33, 40], "privileg": [29, 35, 39, 40], "share_id": [29, 35], "job_role_arn": [29, 35], "execution_role_arn": [29, 35], "usernam": [29, 35, 36], "getpass": [29, 35, 36], "getus": [29, 35, 36], "elev": [29, 35], "permiss": [29, 35, 37, 45], "polici": [29, 35, 39, 41, 45], "9999": [29, 35], "amazon": [29, 35, 36, 45], "arn": [29, 35, 36], "iam": [29, 35, 36], "ec": [29, 35], "agent": [29, 35], "xdg": 29, "aws_sagemak": [29, 36], "instance_typ": [29, 36], "instance_count": [29, 36], "keep_alive_period_in_second": [29, 36], "volume_s": [29, 36], "volume_kms_kei": [29, 36], "max_run": [29, 36], "input_mod": [29, 36], "output_kms_kei": [29, 36], "base_job_nam": [29, 36], "subnet": [29, 36], "security_group_id": [29, 36], "model_uri": [29, 36], "model_channel_nam": [29, 36], "metric_definit": [29, 36], "encrypt_inter_container_traff": [29, 36], "use_spot_inst": [29, 36], "max_wait": [29, 36], "checkpoint_s3_uri": [29, 36], "checkpoint_local_path": [29, 36], "debugger_hook_config": [29, 36], "enable_sagemaker_metr": [29, 36], "enable_network_isol": [29, 36], "disable_profil": [29, 36], "max_retry_attempt": [29, 36], "source_dir": [29, 36], "git_config": [29, 36], "hyperparamet": [29, 36], "container_log_level": [29, 36], "code_loc": [29, 36], "training_repository_access_mod": [29, 36], "training_repository_credentials_provider_arn": [29, 36], "disable_output_compress": [29, 36], "enable_infra_check": [29, 36], "artifact": [29, 36, 46, 47], "ec2": [29, 35, 36, 45], "c4": [29, 36], "xlarg": [29, 36], "instance_group": [29, 36], "warm": [29, 36], "pool": [29, 36], "subsequ": [29, 36], "gb": [29, 33, 36], "km": [29, 36], "encrypt": [29, 36], "eb": [29, 36], "attach": [29, 36, 40, 47], "60": [29, 36], "algorithm": [29, 36], "estim": [29, 36], "timestamp": [29, 36], "vpc": [29, 36], "secur": [29, 36], "pre": [29, 31, 36], "channel": [29, 36], "traffic": [29, 36], "persist": [29, 36, 39, 40, 45, 46], "emit": [29, 36], "debugg": [29, 36], "unless": [29, 36, 43], "region": [29, 36], "isol": [29, 36, 44], "move": [29, 36, 45], "asid": [29, 36], "branch": [29, 36, 40], "commit": [29, 36], "2fa_en": [29, 36], "password": [29, 36], "token": [29, 36], "lambda": [29, 36], "credenti": [29, 35, 36, 38], "authent": [29, 35, 36, 38, 47], "compress": [29, 36], "gcp_batch": [29, 38], "central1": [29, 38], "cluster_config_fil": [29, 43], "cluster_nam": [29, 43], "dashboard_address": [29, 43], "127": [29, 43], "8265": [29, 43], "dashboard": [29, 43], "address": [29, 43], "against": [29, 43, 45], "lsf_queue": [29, 42], "jobdir": [29, 42], "container_workdir": [29, 42], "host_network": [29, 42], "shm_size": [29, 42], "64m": [29, 42], "shm": [29, 42], "timm_app": 29, "timm": 29, "resnet18": 29, "cuda11": 29, "cudnn8": 29, "newli": [29, 47], "28": 29, "29": 29, "59": 29, "c3f17e5ac010": 29, "bb20453f5871": 29, "py3": 29, "whl": 29, "satisfi": 29, "opt": [29, 45], "conda": [29, 44], "python3": 29, "site": 29, "pyyaml": 29, "huggingfac": 29, "hub": 29, "huggingface_hub": 29, "268": 29, "kb": 29, "safetensor": 29, "cp37": 29, "cp37m": 29, "manylinux_2_17_x86_64": 29, "manylinux2014_x86_64": 29, "typing_extens": 29, "filelock": 29, "42": 29, "61": 29, "2023": 29, "143": 29, "importlib": 29, "importlib_metadata": 29, "zipp": 29, "certifi": 29, "2017": 29, "urllib3": 29, "idna": 29, "chardet": 29, "pillow": 29, "f4c69858960a": 29, "c12ecbe590a": 29, "438e4363325c": 29, "02": 29, "feadda048372": 29, "feadda04837239fc5d424a171dc886aedd3cf45942103decea3775796c4dc8f": 29, "conv1": 29, "conv2d": 29, "kernel_s": 29, "stride": 29, "pad": 29, "bia": 29, "bn1": 29, "batchnorm2d": 29, "ep": 29, "1e": 29, "momentum": 29, "affin": 29, "track_running_stat": 29, "act1": 29, "relu": 29, "inplac": 29, "maxpool": 29, "maxpool2d": 29, "dilat": 29, "ceil_mod": 29, "layer1": 29, "sequenti": [29, 33], "drop_block": 29, "aa": 29, "conv2": 29, "bn2": 29, "act2": 29, "layer2": 29, "128": 29, "downsampl": 29, "layer3": 29, "256": 29, "layer4": 29, "512": 29, "global_pool": 29, "selectadaptivepool2d": 29, "pool_typ": 29, "avg": 29, "flatten": 29, "start_dim": 29, "end_dim": 29, "in_featur": 29, "1000": 29, "kg0h7f7c52xbfd": 29, "runcfg": [30, 31, 41], "component_default": 30, "close": [30, 34, 41], "human": 30, "readabl": 30, "constructor": [30, 34], "scheduler_factori": 30, "schedulerfactori": [30, 34], "individu": [30, 42], "act": 30, "upon": [30, 45], "cach": 30, "direct": 30, "soon": 30, "interrupt": 30, "clean": 30, "deem": [30, 34, 41], "associ": [30, 45], "undefin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "ok": 30, "reconstruct": 30, "much": 30, "anymor": 30, "union": [30, 31, 35, 42, 45, 47], "parent_run_id": 30, "appdryruninfo": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dry": [30, 34], "pretti": 30, "dryrun_info": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "dryrun_compon": 30, "component_arg": 30, "Will": 30, "listapprespons": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prototyp": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47], "phase": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "subject": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46], "should_tail": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "honor": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "guarante": [30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "highli": 30, "log_it": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "discourag": 30, "partial": [30, 35, 36, 37, 39, 41, 43, 44], "purg": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "whitespac": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "charact": 30, "newlin": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "configvalu": [30, 45, 46], "present": [30, 31, 43, 45, 47], "anti": 30, "experi": [30, 46], "matches_regex": 30, "model_accuraci": 30, "parse_accuraci": 30, "experiment_nam": 30, "th": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "fetch": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "left": 30, "empti": [30, 31, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "cursor": [30, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "begin": 30, "unknownappexcept": 30, "order": [30, 31, 40, 45], "low": [30, 31], "file_path": 30, "componentvalidationexcept": 30, "componentnotfoundexcept": 30, "sparingli": 30, "abus": 30, "lead": 30, "go": 30, "complianc": 30, "term": 30, "unblock": 30, "certain": [30, 31, 41, 47], "short": 30, "scheduler_backend": [30, 34], "scheduler_run_opt": 30, "local_runopt": 30, "past": 30, "replac": [30, 31, 45, 47], "indefinit": 30, "app_statu": 30, "is_termin": 30, "sleep": [30, 33], "beta": [31, 47], "ini": 31, "sensibl": 31, "placehold": 31, "happi": 31, "redundantli": 31, "decid": 31, "date": 31, "leav": 31, "stale": 31, "ls": 31, "enviorn": 31, "torchx_config": 31, "hierarchi": 31, "overlaid": [31, 47], "malform": 31, "unrecogn": 31, "2x8": 31, "overwritten": [31, 33], "cmd": [31, 42, 44, 45], "addition": [31, 46], "some_workspac": 31, "outmost": 31, "hold": [31, 41, 44, 45], "dir_1": 31, "dir_2": 31, "textio": 31, "configfil": 31, "dump": [31, 33, 43], "required_onli": 31, "templat": [31, 45], "find_config": 31, "filepath": 31, "element": [31, 45], "get_config": 31, "barr": 31, "bazz": 31, "fooo": 31, "load_sect": 31, "content": [31, 44, 47], "categor": 32, "topic": [32, 42], "experiment": [33, 46], "AT": [33, 46], "risk": [33, 46], "TO": [33, 46], "keyword": 33, "intention": 33, "constrain": [33, 39, 40], "hundr": 33, "nor": 33, "quantiti": [33, 45], "hyper": 33, "suppos": 33, "app1": 33, "app2": 33, "feed": 33, "seem": 33, "worri": 33, "pseudo": 33, "do_someth": 33, "s3client": 33, "utf": 33, "output_fil": 33, "input_fil": 33, "decod": 33, "do_something_els": 33, "app1_out": 33, "app1_accuraci": 33, "l2norm": 33, "liter": [33, 45], "1kb": 33, "slash": 33, "statist": 33, "sem": 33, "uniqu": [33, 34, 43, 44, 45], "scope": 33, "central": 33, "entiti": 33, "strong": 33, "made": [33, 45], "similarli": 33, "consecut": 33, "BE": 33, "min": 33, "strongli": 33, "advis": 33, "concaten": 33, "experiment_id": 33, "trial_numb": 33, "123": 33, "attempt_1": 33, "233": 33, "outsid": 33, "get_scheduler_factori": 34, "get_default_scheduler_nam": 34, "default_scheduler_nam": 34, "abc": 34, "abstractmethod": 34, "kill": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "idempot": 34, "thread": [34, 41, 45], "safe": 34, "underli": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "longer": [34, 41], "wrap": [34, 40, 41, 46], "describeapprespons": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "qualifi": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "constitut": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "caller": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "prior": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "Is": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "twice": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "lost": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "live": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "arbitrari": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], "stopiter": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "exhaust": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "stuck": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "eventu": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "__getitem__": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "seek": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "50th": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "carriag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "select": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "combin": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "notimplementederror": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "encourag": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "trivial": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "submit_dryrun": [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45], "mostli": 34, "regard": 34, "not_set": 34, "appstat": [34, 40, 45], "unsubmit": [34, 45], "num_restart": [34, 45], "structured_error_msg": [34, 45], "ui_url": [34, 45], "roles_status": 34, "rolestatu": [34, 45], "suffici": 34, "recret": 34, "member": 34, "accessor": [34, 45], "popul": [34, 45], "userguid": 35, "batch_getstart": 35, "ecr": 35, "amazonecr": 35, "aws_batch_schedul": 35, "awsbatchschedul": 35, "log_client": 35, "docker_cli": [35, 36, 39, 40, 47], "dockercli": [35, 36, 39, 40, 47], "dockerworkspacemixin": [35, 36, 37, 39, 40, 47], "awsbatchopt": 35, "torchx_us": [35, 36, 39, 44], "1234": [35, 36, 39, 44], "ef": 35, "infiniband": 35, "uverbs0": 35, "perm": [35, 39, 40, 45], "rwm": [35, 37, 39, 40, 45], "parse_mount": [35, 37, 39, 40, 45], "fsx": 35, "repost": 35, "knowledg": 35, "center": 35, "lustr": 35, "fabric": 35, "efa": 35, "batchjob": 35, "nonetyp": [35, 42], "job_def": [35, 36, 38], "images_to_push": [35, 36, 39, 40, 47], "aws_sagemaker_schedul": 36, "awssagemakerschedul": 36, "awssagemakeropt": 36, "sagemakerschedul": 36, "awssagemakerjob": 36, "requri": 36, "docker_schedul": 37, "dockerschedul": 37, "dockeropt": 37, "closest": 37, "dockerjob": 37, "dockercontain": 37, "has_dock": 37, "gcp_batch_schedul": 38, "gcpbatchschedul": 38, "gcpbatchopt": 38, "app_id1234": 38, "gcloud": 38, "gcpbatchjob": 38, "batch_v1": 38, "upgrad": 39, "kubectl": 39, "githubusercont": 39, "develop": 39, "kubernetes_schedul": 39, "kubernetesschedul": 39, "apicli": [39, 40], "kubernetesopt": 39, "confirm": [39, 40], "issu": [39, 40, 42, 44], "120": 39, "occur": [39, 45], "bug": 39, "1651": 39, "extern": [39, 40], "hostpath": [39, 40], "persistentvolumeclaim": [39, 40], "claim": [39, 40], "16000": [39, 40], "reserv": [39, 40], "whole": [39, 40], "reduc": [39, 40], "amount": [39, 40], "kubernetesjob": 39, "app_to_resourc": [39, 40], "macro": 39, "pod_label": [39, 40], "role_idx": [39, 40], "role_to_pod": [39, 40], "v1pod": [39, 40], "sanitize_for_seri": [39, 40], "obj": [39, 40, 45], "dispatch": 40, "appwrapp": 40, "codeflar": 40, "kubernetes_mcad_schedul": 40, "kubernetesmcadschedul": 40, "kubernetesmcadopt": 40, "among": 40, "e790d7f": 40, "your_image_repo": 40, "secondari": 40, "coschedul": 40, "podgroup": 40, "sig": 40, "tree": 40, "pkg": 40, "crd": 40, "k8": 40, "io_podgroup": 40, "At": 40, "guidanc": 40, "evict": [40, 45], "preemption": [40, 45], "multu": 40, "k8snetworkplumbingwg": 40, "cni": 40, "kubernetesmcadjob": 40, "mcad_svc": 40, "svc_name": 40, "service_port": 40, "v1servic": 40, "get_appwrapper_statu": 40, "get_port_for_servic": 40, "get_role_inform": 40, "generic_item": 40, "get_tasks_status_descript": 40, "unique_app_id": 40, "localschedul": 41, "image_provider_class": 41, "localopt": 41, "imageprovid": 41, "cache_s": 41, "extra_path": 41, "properti": [41, 45, 46], "enforc": 41, "orphan": 41, "cleanup": 41, "receiv": 41, "sigterm": 41, "sigint": 41, "spawn": 41, "faster": 41, "softwar": [41, 45], "cuda_visible_devic": 41, "accord": [41, 45], "replica_0": 41, "replica_1": 41, "role_0": 41, "role_1": 41, "replica_2": 41, "localhostschedul": 41, "real": 41, "op": 41, "fetch_rol": 41, "updat": [41, 47], "compli": [41, 45], "deleg": 41, "get_cwd": 41, "child": [41, 45], "get_entrypoint": 41, "get_replica_param": 41, "holder": 41, "cwdimageprovid": 41, "localdirectoryimageprovid": 41, "getcwd": 41, "conjunct": 41, "not_exist": 41, "image_typ": 41, "childprocess": 41, "logiter": 41, "log_fil": 41, "_popen": 41, "signalexcept": 41, "sigval": 41, "signal": 41, "got": 41, "feedback": 42, "edit": 42, "pak": 42, "lsf_schedul": 42, "lsfschedul": 42, "lsfopt": 42, "mnt": 42, "tofix": 42, "On": 42, "reoslv": 42, "lsfbsub": 42, "ray_schedul": 43, "rayschedul": 43, "ray_client": 43, "jobsubmissioncli": 43, "tmpdirworkspacemixin": 43, "rayopt": 43, "actor": 43, "torchxignor": [43, 47], "overridden": 43, "dummi": 43, "rayjob": 43, "wait_until_finish": 43, "has_rai": 43, "indic": 43, "rayactor": 43, "output_filenam": 43, "working_dir": 43, "ray_common": 43, "ip": 43, "connect": 43, "ray_main": 43, "slurm_schedul": 44, "slurmschedul": 44, "dirworkspacemixin": [44, 47], "slurmopt": 44, "heterogen": 44, "sbatch": 44, "jobid": 44, "abl": 44, "schedmd": 44, "section_opt": 44, "inherit": 44, "activ": 44, "heterogeneous_job": 44, "snapshot": 44, "1gb": 44, "realmemori": 44, "workaround": 44, "parallelclust": 44, "2198": 44, "slurmbatchrequest": 44, "slurmreplicarequest": 44, "srun_opt": 44, "sbatch_opt": 44, "classmethod": 44, "from_rol": 44, "nomem": 44, "srun": 44, "treatment": 45, "min_replica": 45, "base_imag": 45, "miss": 45, "bindmount": 45, "volumemount": 45, "devicemount": 45, "duti": 45, "ps": 45, "bundl": 45, "dictat": 45, "ball": 45, "my_imag": 45, "env_var": 45, "500": 45, "tcp_store": 45, "8080": 45, "auto": 45, "scale": 45, "give": 45, "least": 45, "9090": 45, "pre_proc": 45, "encount": 45, "unsuccess": 45, "hardwar": 45, "caveat": 45, "surviv": 45, "untouch": 45, "membership": 45, "departur": 45, "admitt": 45, "physic": 45, "ram": 45, "predec": 45, "registr": 45, "retriev": 45, "gpu_x_1": 45, "named_resources_aw": 45, "taken": 45, "mere": 45, "equval": 45, "mem": 45, "aws_t3": 45, "medium": 45, "aws_m5": 45, "8xlarg": 45, "aws_m5_2xlarg": 45, "aws_p3_2xlarg": 45, "aws_p3_8xlarg": 45, "aws_t3_medium": 45, "mention": 45, "image_root_dir": 45, "train_app": 45, "rank0_env": 45, "base_img_root": 45, "accept": 45, "run_config_opt": 45, "run_as_us": 45, "type_": 45, "cluster_id": 45, "preemptibl": 45, "illeg": 45, "bad_typ": 45, "cfg_kei": 45, "cfg_from_str": 45, "cfg_str": 45, "cast": 45, "appropri": 45, "unknown": 45, "cfg_liter": 45, "kv": 45, "semi": 45, "colon": 45, "cfgval": 45, "trail": 45, "strictli": 45, "correct": 45, "is_typ": 45, "tp": 45, "isinst": 45, "text": 45, "recent": 45, "filter_rol": 45, "appstatuserror": 45, "pend": 45, "yet": [45, 46], "unsuccessfulli": 45, "replicast": 45, "alia": 45, "src_path": 45, "dst_path": 45, "read_onli": 45, "mknode": 45, "file_lint": 45, "component_funct": 45, "lintermessag": 45, "vaidat": 45, "stypl": 45, "get_fn_docstr": 45, "char": 45, "torchfunctionvisitor": 45, "component_function_nam": 45, "visitor": 45, "torchxfunctionargsvalid": 45, "criteria": 45, "primitive_typ": 45, "visit_functiondef": 45, "functiondef": 45, "torchxargumenthelpformatt": 45, "prog": 45, "indent_incr": 45, "max_help_posit": 45, "width": 45, "formatt": 45, "app_specs_func_def": 45, "torchxfunctionvalid": 45, "torchxreturnvalid": 45, "practition": 46, "conceptu": 46, "uniform": 46, "solut": 46, "tracker_nam": 46, "inject": 46, "entry_point_or_module_factory_method": 46, "tracker1": 46, "tracker2": 46, "backend_2_entry_point": 46, "tracker3": 46, "mlflow": 46, "create_track": 46, "my_bucket": 46, "my_config": 46, "discover": 46, "accomplish": 46, "entry_point_nam": 46, "create_tracker_fn": 46, "app_run_from_env": 46, "torchx_job_id": 46, "app_run": 46, "fsspectrack": 46, "cmdtracker": 46, "parent": 46, "run_id": 46, "artifact_nam": 46, "consumpt": 46, "encapsul": 46, "stil": 46, "abstractfilesystem": [46, 47], "root_dir": 46, "backward": 46, "gurante": 46, "subdir": 46, "descend": 46, "cmd_tracker": 46, "workspacemixin": 47, "mix": 47, "abil": 47, "codebas": 47, "build_workspace_and_update_rol": 47, "simplest": 47, "effici": 47, "increment": 47, "mutat": 47, "dryrun_push_imag": 47, "dryrun_push": 47, "push_imag": 47, "workspace_opt": 47, "walk_workspac": 47, "ignore_nam": 47, "engin": 47, "builder": 47, "exclud": 47, "whose": 47, "_update_app_imag": 47, "_push_imag": 47}, "objects": {"torchx": [[3, 0, 0, "-", "cli"], [8, 0, 0, "-", "components"], [26, 0, 0, "-", "pipelines"], [30, 0, 0, "-", "runner"], [32, 0, 0, "-", "runtime"], [34, 0, 0, "-", "schedulers"], [45, 0, 0, "-", "specs"], [46, 0, 0, "-", "tracker"], [47, 0, 0, "-", "workspace"]], "torchx.cli.cmd_tracker": [[46, 1, 1, "", "CmdTracker"]], "torchx.components": [[4, 0, 0, "-", "component_test_base"], [5, 0, 0, "-", "dist"], [6, 0, 0, "-", "interpret"], [7, 0, 0, "-", "metrics"], [9, 0, 0, "-", "serve"], [10, 0, 0, "-", "train"], [11, 0, 0, "-", "utils"]], "torchx.components.component_test_base": [[4, 1, 1, "", "ComponentTestCase"]], "torchx.components.component_test_base.ComponentTestCase": [[4, 2, 1, "", "run_component"], [4, 2, 1, "", "setUp"], [4, 2, 1, "", "tearDown"], [4, 2, 1, "", "validate"]], "torchx.components.dist": [[5, 3, 1, "", "_TORCH_DEBUG_FLAGS"], [5, 4, 1, "", "ddp"]], "torchx.components.metrics": [[7, 4, 1, "", "tensorboard"]], "torchx.components.serve": [[9, 4, 1, "", "torchserve"]], "torchx.components.utils": [[11, 4, 1, "", "binary"], [11, 4, 1, "", "booth"], [11, 4, 1, "", "copy"], [11, 4, 1, "", "echo"], [11, 4, 1, "", "python"], [11, 4, 1, "", "sh"], [11, 4, 1, "", "touch"]], "torchx.pipelines": [[28, 0, 0, "-", "kfp"]], "torchx.pipelines.kfp.adapter": [[28, 1, 1, "", "ContainerFactory"], [28, 4, 1, "", "component_from_app"], [28, 4, 1, "", "component_spec_from_app"], [28, 4, 1, "", "container_from_app"], [28, 4, 1, "", "resource_from_app"]], "torchx.runner": [[30, 1, 1, "", "Runner"], [31, 0, 0, "-", "config"], [30, 4, 1, "", "get_runner"]], "torchx.runner.Runner": [[30, 2, 1, "", "cancel"], [30, 2, 1, "", "close"], [30, 2, 1, "", "describe"], [30, 2, 1, "", "dryrun"], [30, 2, 1, "", "dryrun_component"], [30, 2, 1, "", "list"], [30, 2, 1, "", "log_lines"], [30, 2, 1, "", "run"], [30, 2, 1, "", "run_component"], [30, 2, 1, "", "schedule"], [30, 2, 1, "", "scheduler_backends"], [30, 2, 1, "", "scheduler_run_opts"], [30, 2, 1, "", "status"], [30, 2, 1, "", "stop"], [30, 2, 1, "", "wait"]], "torchx.runner.config": [[31, 4, 1, "", "apply"], [31, 4, 1, "", "dump"], [31, 4, 1, "", "find_configs"], [31, 4, 1, "", "get_config"], [31, 4, 1, "", "get_configs"], [31, 4, 1, "", "load"], [31, 4, 1, "", "load_sections"]], "torchx.runtime": [[33, 0, 0, "-", "tracking"]], "torchx.runtime.tracking": [[33, 1, 1, "", "FsspecResultTracker"], [33, 1, 1, "", "ResultTracker"]], "torchx.schedulers": [[34, 1, 1, "", "Scheduler"], [34, 1, 1, "", "SchedulerFactory"], [35, 0, 0, "-", "aws_batch_scheduler"], [36, 0, 0, "-", "aws_sagemaker_scheduler"], [37, 0, 0, "-", "docker_scheduler"], [38, 0, 0, "-", "gcp_batch_scheduler"], [34, 4, 1, "", "get_default_scheduler_name"], [34, 4, 1, "", "get_scheduler_factories"], [40, 0, 0, "-", "kubernetes_mcad_scheduler"], [39, 0, 0, "-", "kubernetes_scheduler"], [41, 0, 0, "-", "local_scheduler"], [42, 0, 0, "-", "lsf_scheduler"], [43, 0, 0, "-", "ray_scheduler"], [44, 0, 0, "-", "slurm_scheduler"]], "torchx.schedulers.Scheduler": [[34, 2, 1, "", "cancel"], [34, 2, 1, "", "close"], [34, 2, 1, "", "describe"], [34, 2, 1, "", "exists"], [34, 2, 1, "", "list"], [34, 2, 1, "", "log_iter"], [34, 2, 1, "", "run_opts"], [34, 2, 1, "", "schedule"], [34, 2, 1, "", "submit"], [34, 2, 1, "", "submit_dryrun"]], "torchx.schedulers.api": [[34, 1, 1, "", "DescribeAppResponse"], [34, 1, 1, "", "ListAppResponse"]], "torchx.schedulers.aws_batch_scheduler": [[35, 1, 1, "", "AWSBatchScheduler"], [35, 1, 1, "", "BatchJob"], [35, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_batch_scheduler.AWSBatchScheduler": [[35, 2, 1, "", "describe"], [35, 2, 1, "", "list"], [35, 2, 1, "", "log_iter"], [35, 2, 1, "", "schedule"]], "torchx.schedulers.aws_sagemaker_scheduler": [[36, 1, 1, "", "AWSSageMakerJob"], [36, 1, 1, "", "AWSSageMakerScheduler"], [36, 4, 1, "", "create_scheduler"]], "torchx.schedulers.aws_sagemaker_scheduler.AWSSageMakerScheduler": [[36, 2, 1, "", "describe"], [36, 2, 1, "", "list"], [36, 2, 1, "", "log_iter"], [36, 2, 1, "", "schedule"]], "torchx.schedulers.docker_scheduler": [[37, 1, 1, "", "DockerContainer"], [37, 1, 1, "", "DockerJob"], [37, 1, 1, "", "DockerScheduler"], [37, 4, 1, "", "create_scheduler"], [37, 4, 1, "", "has_docker"]], "torchx.schedulers.docker_scheduler.DockerScheduler": [[37, 2, 1, "", "describe"], [37, 2, 1, "", "list"], [37, 2, 1, "", "log_iter"], [37, 2, 1, "", "schedule"]], "torchx.schedulers.gcp_batch_scheduler": [[38, 1, 1, "", "GCPBatchJob"], [38, 1, 1, "", "GCPBatchScheduler"], [38, 4, 1, "", "create_scheduler"]], "torchx.schedulers.gcp_batch_scheduler.GCPBatchScheduler": [[38, 2, 1, "", "describe"], [38, 2, 1, "", "list"], [38, 2, 1, "", "log_iter"], [38, 2, 1, "", "run_opts"], [38, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_mcad_scheduler": [[40, 1, 1, "", "KubernetesMCADJob"], [40, 1, 1, "", "KubernetesMCADScheduler"], [40, 4, 1, "", "app_to_resource"], [40, 4, 1, "", "create_scheduler"], [40, 4, 1, "", "get_appwrapper_status"], [40, 4, 1, "", "get_port_for_service"], [40, 4, 1, "", "get_role_information"], [40, 4, 1, "", "get_tasks_status_description"], [40, 4, 1, "", "mcad_svc"], [40, 4, 1, "", "pod_labels"], [40, 4, 1, "", "role_to_pod"], [40, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_mcad_scheduler.KubernetesMCADScheduler": [[40, 2, 1, "", "describe"], [40, 2, 1, "", "list"], [40, 2, 1, "", "log_iter"], [40, 2, 1, "", "run_opts"], [40, 2, 1, "", "schedule"]], "torchx.schedulers.kubernetes_scheduler": [[39, 1, 1, "", "KubernetesJob"], [39, 1, 1, "", "KubernetesScheduler"], [39, 4, 1, "", "app_to_resource"], [39, 4, 1, "", "create_scheduler"], [39, 4, 1, "", "pod_labels"], [39, 4, 1, "", "role_to_pod"], [39, 4, 1, "", "sanitize_for_serialization"]], "torchx.schedulers.kubernetes_scheduler.KubernetesScheduler": [[39, 2, 1, "", "describe"], [39, 2, 1, "", "list"], [39, 2, 1, "", "log_iter"], [39, 2, 1, "", "schedule"]], "torchx.schedulers.local_scheduler": [[41, 1, 1, "", "CWDImageProvider"], [41, 1, 1, "", "ImageProvider"], [41, 1, 1, "", "LocalDirectoryImageProvider"], [41, 1, 1, "", "LocalScheduler"], [41, 1, 1, "", "LogIterator"], [41, 1, 1, "", "PopenRequest"], [41, 1, 1, "", "ReplicaParam"], [41, 1, 1, "", "SignalException"], [41, 4, 1, "", "create_scheduler"]], "torchx.schedulers.local_scheduler.CWDImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.ImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "fetch_role"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"], [41, 2, 1, "", "get_replica_param"]], "torchx.schedulers.local_scheduler.LocalDirectoryImageProvider": [[41, 2, 1, "", "fetch"], [41, 2, 1, "", "get_cwd"], [41, 2, 1, "", "get_entrypoint"]], "torchx.schedulers.local_scheduler.LocalScheduler": [[41, 2, 1, "", "auto_set_CUDA_VISIBLE_DEVICES"], [41, 2, 1, "", "close"], [41, 2, 1, "", "describe"], [41, 2, 1, "", "list"], [41, 2, 1, "", "log_iter"], [41, 2, 1, "", "schedule"]], "torchx.schedulers.lsf_scheduler": [[42, 1, 1, "", "LsfBsub"], [42, 1, 1, "", "LsfScheduler"], [42, 4, 1, "", "create_scheduler"]], "torchx.schedulers.lsf_scheduler.LsfScheduler": [[42, 2, 1, "", "describe"], [42, 2, 1, "", "list"], [42, 2, 1, "", "log_iter"], [42, 2, 1, "", "schedule"]], "torchx.schedulers.ray_scheduler": [[43, 1, 1, "", "RayJob"], [43, 1, 1, "", "RayScheduler"], [43, 4, 1, "", "create_scheduler"], [43, 4, 1, "", "has_ray"], [43, 4, 1, "", "serialize"]], "torchx.schedulers.ray_scheduler.RayScheduler": [[43, 2, 1, "", "describe"], [43, 2, 1, "", "list"], [43, 2, 1, "", "log_iter"], [43, 2, 1, "", "schedule"], [43, 2, 1, "", "wait_until_finish"]], "torchx.schedulers.slurm_scheduler": [[44, 1, 1, "", "SlurmBatchRequest"], [44, 1, 1, "", "SlurmReplicaRequest"], [44, 1, 1, "", "SlurmScheduler"], [44, 4, 1, "", "create_scheduler"]], "torchx.schedulers.slurm_scheduler.SlurmBatchRequest": [[44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmReplicaRequest": [[44, 2, 1, "", "from_role"], [44, 2, 1, "", "materialize"]], "torchx.schedulers.slurm_scheduler.SlurmScheduler": [[44, 2, 1, "", "describe"], [44, 2, 1, "", "list"], [44, 2, 1, "", "log_iter"], [44, 2, 1, "", "schedule"]], "torchx.specs": [[45, 1, 1, "", "AppDef"], [45, 1, 1, "", "AppState"], [45, 1, 1, "", "AppStatus"], [45, 1, 1, "", "BindMount"], [45, 1, 1, "", "DeviceMount"], [45, 5, 1, "", "ReplicaState"], [45, 1, 1, "", "Resource"], [45, 1, 1, "", "RetryPolicy"], [45, 1, 1, "", "Role"], [45, 1, 1, "", "VolumeMount"], [45, 0, 0, "-", "file_linter"], [45, 4, 1, "", "get_named_resources"], [45, 1, 1, "", "macros"], [45, 0, 0, "-", "named_resources_aws"], [45, 4, 1, "", "parse_mounts"], [45, 4, 1, "", "resource"], [45, 1, 1, "", "runopts"]], "torchx.specs.AppStatus": [[45, 2, 1, "", "format"], [45, 2, 1, "", "raise_for_status"]], "torchx.specs.Resource": [[45, 2, 1, "", "copy"]], "torchx.specs.Role": [[45, 2, 1, "", "pre_proc"]], "torchx.specs.file_linter": [[45, 1, 1, "", "LinterMessage"], [45, 1, 1, "", "TorchFunctionVisitor"], [45, 1, 1, "", "TorchXArgumentHelpFormatter"], [45, 1, 1, "", "TorchxFunctionArgsValidator"], [45, 1, 1, "", "TorchxFunctionValidator"], [45, 1, 1, "", "TorchxReturnValidator"], [45, 4, 1, "", "get_fn_docstring"], [45, 4, 1, "", "validate"]], "torchx.specs.file_linter.TorchFunctionVisitor": [[45, 2, 1, "", "visit_FunctionDef"]], "torchx.specs.file_linter.TorchxFunctionArgsValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxFunctionValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.file_linter.TorchxReturnValidator": [[45, 2, 1, "", "validate"]], "torchx.specs.macros": [[45, 1, 1, "", "Values"]], "torchx.specs.macros.Values": [[45, 2, 1, "", "apply"], [45, 2, 1, "", "substitute"]], "torchx.specs.named_resources_aws": [[45, 4, 1, "", "aws_m5_2xlarge"], [45, 4, 1, "", "aws_p3_2xlarge"], [45, 4, 1, "", "aws_p3_8xlarge"], [45, 4, 1, "", "aws_t3_medium"]], "torchx.specs.runopts": [[45, 2, 1, "", "add"], [45, 2, 1, "", "cfg_from_str"], [45, 2, 1, "", "get"], [45, 2, 1, "", "is_type"], [45, 2, 1, "", "resolve"]], "torchx.tracker": [[46, 1, 1, "", "AppRun"]], "torchx.tracker.api": [[46, 1, 1, "", "TrackerBase"]], "torchx.tracker.backend.fsspec": [[46, 1, 1, "", "FsspecTracker"]], "torchx.workspace": [[47, 1, 1, "", "WorkspaceMixin"], [47, 0, 0, "-", "dir_workspace"], [47, 0, 0, "-", "docker_workspace"], [47, 4, 1, "", "walk_workspace"]], "torchx.workspace.WorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]], "torchx.workspace.dir_workspace": [[47, 1, 1, "", "DirWorkspaceMixin"]], "torchx.workspace.dir_workspace.DirWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"]], "torchx.workspace.docker_workspace": [[47, 1, 1, "", "DockerWorkspaceMixin"]], "torchx.workspace.docker_workspace.DockerWorkspaceMixin": [[47, 2, 1, "", "build_workspace_and_update_role"], [47, 2, 1, "", "dryrun_push_images"], [47, 2, 1, "", "push_images"], [47, 2, 1, "", "workspace_opts"]]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:method", "3": "py:data", "4": "py:function", "5": "py:attribute"}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "method", "Python method"], "3": ["py", "data", "Python data"], "4": ["py", "function", "Python function"], "5": ["py", "attribute", "Python attribute"]}, "titleterms": {"advanc": [0, 22, 46], "usag": [0, 14, 17, 20, 25, 31, 33, 46], "regist": 0, "custom": [0, 8, 12, 29], "schedul": [0, 2, 3, 25, 29, 34], "name": [0, 4, 45], "resourc": [0, 4, 8, 45], "compon": [0, 2, 3, 4, 5, 8, 12, 22, 25, 45], "app": [1, 14], "best": [1, 4, 25], "practic": [1, 4, 25], "data": [1, 14, 15, 46], "pass": [1, 8], "storag": 1, "train": [1, 10], "loop": 1, "metric": [1, 7], "checkpoint": 1, "fine": 1, "tune": 1, "interpret": [1, 6, 17], "model": [1, 17, 18], "packag": 1, "python": 1, "save": 1, "weight": 1, "torchscript": 1, "torchserv": 1, "archiv": 1, "mar": 1, "torch": 1, "serv": [1, 9], "infer": 1, "test": [1, 4], "basic": 2, "concept": 2, "project": 2, "structur": 2, "appdef": [2, 45], "runner": [2, 30], "pipelin": [2, 8, 21, 22, 23, 24, 25, 26, 28], "adapt": 2, "runtim": [2, 25], "next": [2, 27, 29], "step": [2, 27, 29], "cli": [3, 8, 31], "list": 3, "builtin": [3, 5, 8, 12], "support": 3, "argument": [3, 4, 22], "run": [3, 8, 45], "job": [3, 8, 46], "inspect": 3, "what": 3, "dryrun": 3, "describ": 3, "queri": [3, 46], "statu": [3, 45], "view": 3, "log": [3, 19], "entrypoint": 4, "simplifi": 4, "process": 4, "branch": 4, "logic": 4, "document": [4, 25], "compos": 4, "distribut": [4, 5, 23, 29], "defin": 4, "all": [4, 26, 34], "unit": 4, "integr": 4, "ddp": 5, "api": [5, 25, 31], "refer": [7, 25, 35, 36, 37, 38, 39, 40, 41, 42, 46], "overview": [8, 32, 33, 46], "us": 8, "author": 8, "valid": 8, "programmat": [8, 31], "param": 8, "from": 8, "addit": 8, "embed": 10, "script": 10, "util": 11, "hello": [12, 29], "world": [12, 13, 15, 29], "comput": [13, 15], "size": [13, 15], "exampl": [13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 46], "preprocess": [14, 15], "applic": [15, 25], "prerequisit": [15, 35, 38, 39, 40, 42], "lightn": 15, "trainer": [15, 16, 20], "dataset": 16, "tini": 18, "imagenet": 18, "simpl": 19, "profil": 19, "kubeflow": [21, 22, 23, 24, 28], "input": 22, "creat": 22, "definit": 22, "intro": 24, "torchx": [25, 26, 28, 30, 34, 45, 46, 47], "In": 25, "1": [25, 46], "2": [25, 46], "3": 25, "work": 25, "With": 25, "librari": 25, "airflow": 27, "kfp": 28, "quickstart": 29, "instal": 29, "launch": 29, "workspac": [29, 47], "patch": 29, "torchxconfig": [29, 31], "remot": 29, "imag": [29, 41], "docker": [29, 37], "base": [29, 33], "slurm": [29, 44], "function": [30, 31, 34], "class": [30, 34], "config": [31, 45], "track": 33, "resulttrack": 33, "fsspec": 33, "aw": [35, 36, 45], "batch": [35, 38], "sagemak": 36, "gcp": 38, "kubernet": [39, 40], "mcad": 40, "local": 41, "provid": 41, "ibm": 42, "spectrum": 42, "lsf": 42, "rai": 43, "spec": 45, "role": 45, "macro": 45, "mount": 45, "linter": 45, "tracker": 46, "setup": 46, "launcher": 46, "side": 46, "configur": 46, "user": 46, "acquir": 46, "apprun": 46, "instanc": 46, "trackerbas": 46, "implement": 46, "docker_workspac": 47, "dir_workspac": 47}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 6, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.todo": 2, "sphinx.ext.viewcode": 1, "nbsphinx": 4, "sphinx.ext.intersphinx": 1, "sphinx": 56}}) \ No newline at end of file