From 0ebb796aa70b6bd4d1d974f77cbe8fd4ce1f6255 Mon Sep 17 00:00:00 2001 From: erikhuck Date: Sun, 31 Mar 2024 22:20:23 -0400 Subject: [PATCH] Updates the tutorial and other documentation --- README.rst | 2 +- docs/cli.rst | 2 +- docs/conf.py | 1 + docs/notebook/tutorial.ipynb | 484 +++++++++++++++++++++++++++++------ docs/tutorial.rst | 383 ++++++++++++++++++++++----- src/gpu_tracker/__main__.py | 5 +- src/gpu_tracker/tracker.py | 87 ++++--- 7 files changed, 795 insertions(+), 169 deletions(-) diff --git a/README.rst b/README.rst index 7d6723f..bbc6638 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ gpu_tracker ########### Description ----------- -The ``gpu_tracker`` package provides a ``Tracker`` class that tracks (profiles) the usage of compute time, maximum RAM, and maximum GPU RAM. +The ``gpu_tracker`` package provides a ``Tracker`` class and a commandline-interface that tracks (profiles) the usage of compute time, maximum RAM, and maximum GPU RAM. The compute time is a measurement of the real time taken by the task as opposed to the CPU-utilization time. The GPU tracking is for Nvidia GPUs and uses the ``nvidia-smi`` command, assuming the Nvidia drivers have been installed. Computational resources are tracked throughout the duration of a context manager or the duration of explicit calls to the ``start()`` and ``stop()`` methods of the ``Tracker`` class. diff --git a/docs/cli.rst b/docs/cli.rst index 062e2a9..3440429 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -3,7 +3,7 @@ CLI The ``gpu-tracker`` command-line interface allows tracking computational-resource-usage of an arbitrary shell command. For example, one may want to profile a command that runs a script or a command ran in a high-performance-computing job. Below is the help message shown from ``gpu-tracker --help``. -See the :ref:`tutorial-label` for examples of using the CLI. +See the CLI section of the :ref:`tutorial-label` for examples of using the CLI. .. literalinclude:: ../src/gpu_tracker/__main__.py :start-at: Usage: diff --git a/docs/conf.py b/docs/conf.py index 6528130..0c64f09 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -51,6 +51,7 @@ def setup(app): autodoc_typehints = 'both' autoclass_content = 'both' +autodoc_member_order = 'bysource' html_theme = 'sphinx_rtd_theme' html_static_path = ['_static'] diff --git a/docs/notebook/tutorial.ipynb b/docs/notebook/tutorial.ipynb index 68267e9..97427e0 100644 --- a/docs/notebook/tutorial.ipynb +++ b/docs/notebook/tutorial.ipynb @@ -21,7 +21,7 @@ "id": "2bb9e84a-8523-4e5f-bc01-1d6b234c19a6", "metadata": {}, "source": [ - "The `gpu_tracker` package provides the `Tracker` class which uses an underlying thread to measure computational resource usage, namely the compute time, maximum RAM used, and maximum GPU RAM used. The `start()` method starts this thread which tracks usage in the background. After calling `start()`, write the code to measure resource usage, followed by calling the `stop()` method. The compute time will be the time from the call to `start()` to the call to `stop()` and the RAM and GPU RAM quantities will be the amount of RAM used by the code that's in between `start()` and `stop()`. The `Tracker` class additionally has a `__str__` method so it can be printed as a string that formats the values and units of each computational resource." + "The `gpu_tracker` package provides the `Tracker` class which uses an underlying thread to measure computational resource usage, namely the compute time, maximum RAM used, and maximum GPU RAM used. The `start()` method starts this thread which tracks usage in the background. After calling `start()`, write the code to measure resource usage, followed by calling the `stop()` method. The compute time will be the time from the call to `start()` to the call to `stop()` and the RAM and GPU RAM quantities will be the amount of RAM used by the code that's in between `start()` and `stop()`." ] }, { @@ -44,36 +44,94 @@ "execution_count": 2, "id": "1c59d6dc-2e8c-4d5a-ac80-ab85bb2c62a5", "metadata": {}, + "outputs": [], + "source": [ + "tracker = gput.Tracker()\n", + "tracker.start()\n", + "# Perform expensive operations\n", + "tracker.stop()" + ] + }, + { + "cell_type": "markdown", + "id": "0a02518b-f55a-4119-8fc1-2daac6429e44", + "metadata": {}, + "source": [ + "The `Tracker` class implements the `__str__` method so it can be printed as a string with the values and units of each computational resource formatted." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "710e11da-7a70-49fe-be6d-c1d25ce9d53f", + "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Max RAM: 0.067 gigabytes\n", - "Max GPU RAM: 0.000 gigabytes\n", - "Compute time: 0.000 hours\n" + "Max RAM:\n", + " Unit: gigabytes\n", + " System capacity: 67.254\n", + " System: 4.417\n", + " Main:\n", + " Total RSS: 0.061\n", + " Private RSS: 0.05\n", + " Shared RSS: 0.011\n", + " Descendents:\n", + " Total RSS: 0.0\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.0\n", + " Combined:\n", + " Total RSS: 0.063\n", + " Private RSS: 0.052\n", + " Shared RSS: 0.011\n", + "Max GPU RAM:\n", + " Unit: gigabytes\n", + " Main: 0.0\n", + " Descendents: 0.0\n", + " Combined: 0.0\n", + "Compute time:\n", + " Unit: hours\n", + " Time: 0.0\n" ] } ], "source": [ - "tracker = gput.Tracker()\n", - "tracker.start()\n", - "# Perform expensive operations\n", - "tracker.stop()\n", "print(tracker)" ] }, + { + "cell_type": "markdown", + "id": "fc7819ab-a3fc-4a29-8a5c-1790e192a465", + "metadata": {}, + "source": [ + "The system capacity is a constant for the total RAM capacity across the entire operating system, not to be confused with the maximum system RAM which is the maximum OS RAM that was actually used over the duration of the computational-resource tracking. Both the RAM and GPU RAM are split up into 3 sections, namely the usage of the main process itself followed by the summed usage of any descendent processes it may have (i.e. child processes, grandchild processes, etc.), and combined usage which is the sum of the main and its descendent processes. RAM is divided further to include the private RSS (RAM usage unique to the process), shared RSS (RAM that's shared by a process and at least one other process), and total RSS (the sum of private and shared RSS). The private and shared RSS values are only available on Linux distributions. So for non-linux operating systems, the privated and shared RSS will remain 0 and only the total RSS will be reported. Theoretically, the combined total RSS would never exceed the overall system RAM usage, but inaccuracies resulting from shared RSS can cause this to happen, especially for non-linux operating systems (see note below).\n", + "\n", + "The `Tracker` assumes that GPU memory is not shared accross multiple processes and if it is, the reported GPU RAM of \"descendent\" and \"combined\" may be an overestimation.\n", + "\n", + "The compute time is the real time that the computational-resource tracking lasted (as compared to CPU time)." + ] + }, + { + "cell_type": "markdown", + "id": "9eb6f048-68cf-4dc5-b11d-cdcdf39bde3a", + "metadata": {}, + "source": [ + "***NOTE** The keywords \"descendents\" and \"combined\" in the output above indicate a sum of the RSS used by multiple processes. It's important to keep in mind that on non-linux operating systems, this sum does not take into account shared memory but rather adds up the total RSS of all processes, which can lead to an overestimation. For Linux distributions, however, pieces of shared memory are only counted once.*" + ] + }, { "cell_type": "markdown", "id": "dbf7ad83-21d0-4cd2-adbb-278fa80d2b13", "metadata": {}, "source": [ - "The equivalent can be accomplished using `Tracker` as a context manager rather than explicitly calling `start()` and `stop()`." + "The `Tracker` can alternatively be used as a context manager rather than explicitly calling `start()` and `stop()`." ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "08f688f4-f1b6-41dc-91f9-76f9c7e0fdb3", "metadata": {}, "outputs": [ @@ -81,9 +139,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "Max RAM: 0.067 gigabytes\n", - "Max GPU RAM: 0.000 gigabytes\n", - "Compute time: 0.000 hours\n" + "Max RAM:\n", + " Unit: gigabytes\n", + " System capacity: 67.254\n", + " System: 4.398\n", + " Main:\n", + " Total RSS: 0.063\n", + " Private RSS: 0.052\n", + " Shared RSS: 0.011\n", + " Descendents:\n", + " Total RSS: 0.0\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.0\n", + " Combined:\n", + " Total RSS: 0.063\n", + " Private RSS: 0.052\n", + " Shared RSS: 0.011\n", + "Max GPU RAM:\n", + " Unit: gigabytes\n", + " Main: 0.0\n", + " Descendents: 0.0\n", + " Combined: 0.0\n", + "Compute time:\n", + " Unit: hours\n", + " Time: 0.0\n" ] } ], @@ -104,7 +183,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "09201850-de04-4864-a7e2-3747d4fb9b3a", "metadata": {}, "outputs": [ @@ -112,9 +191,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "Max RAM: 67.391 megabytes\n", - "Max GPU RAM: 0.000 kilobytes\n", - "Compute time: 1.035 seconds\n" + "Max RAM:\n", + " Unit: megabytes\n", + " System capacity: 67254.161\n", + " System: 4378.161\n", + " Main:\n", + " Total RSS: 63.336\n", + " Private RSS: 52.171\n", + " Shared RSS: 11.166\n", + " Descendents:\n", + " Total RSS: 0.0\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.0\n", + " Combined:\n", + " Total RSS: 63.341\n", + " Private RSS: 52.175\n", + " Shared RSS: 11.166\n", + "Max GPU RAM:\n", + " Unit: kilobytes\n", + " Main: 0.0\n", + " Descendents: 0.0\n", + " Combined: 0.0\n", + "Compute time:\n", + " Unit: seconds\n", + " Time: 0.063\n" ] } ], @@ -130,59 +230,191 @@ "id": "2323016e-f390-4584-b540-a330af5b635e", "metadata": {}, "source": [ - "The same information can be obtained in a dictionary via the `Tracker`'s `to_json()` method." + "The same information as the text format can be provided as a dictionary via the `to_json()` method of the `Tracker`." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "85291150-bac7-4057-8ce6-35dfb6badb88", "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"max_ram\": {\n", + " \"unit\": \"megabytes\",\n", + " \"system_capacity\": 67254.161408,\n", + " \"system\": 4378.161152,\n", + " \"main\": {\n", + " \"total_rss\": 63.336448,\n", + " \"private_rss\": 52.170752,\n", + " \"shared_rss\": 11.165695999999999\n", + " },\n", + " \"descendents\": {\n", + " \"total_rss\": 0.0,\n", + " \"private_rss\": 0.0,\n", + " \"shared_rss\": 0.0\n", + " },\n", + " \"combined\": {\n", + " \"total_rss\": 63.340543999999994,\n", + " \"private_rss\": 52.174848,\n", + " \"shared_rss\": 11.165695999999999\n", + " }\n", + " },\n", + " \"max_gpu_ram\": {\n", + " \"unit\": \"kilobytes\",\n", + " \"main\": 0.0,\n", + " \"descendents\": 0.0,\n", + " \"combined\": 0.0\n", + " },\n", + " \"compute_time\": {\n", + " \"unit\": \"seconds\",\n", + " \"time\": 0.06275105476379395\n", + " }\n", + "}\n" + ] + } + ], + "source": [ + "import json\n", + "print(json.dumps(tracker.to_json(), indent=1))" + ] + }, + { + "cell_type": "markdown", + "id": "3ab1039a-09bc-44b7-b106-37e6986869d4", + "metadata": {}, + "source": [ + "The `Tracker` class additionally has fields that provide the usage information for each computational resource as python data classes. " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "882210f5-233f-4ca9-b2ca-8b187d0d7d7e", + "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'max_ram': 67.391488,\n", - " 'ram_unit': 'megabytes',\n", - " 'max_gpu_ram': 0.0,\n", - " 'gpu_ram_unit': 'kilobytes',\n", - " 'compute_time': 1.0349853038787842,\n", - " 'time_unit': 'seconds'}" + "MaxRAM(unit='megabytes', system_capacity=67254.161408, system=4378.161152, main=RSSValues(total_rss=63.336448, private_rss=52.170752, shared_rss=11.165695999999999), descendents=RSSValues(total_rss=0.0, private_rss=0.0, shared_rss=0.0), combined=RSSValues(total_rss=63.340543999999994, private_rss=52.174848, shared_rss=11.165695999999999))" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "tracker.to_json()" + "tracker.max_ram" ] }, { - "cell_type": "markdown", - "id": "d119d306-71e3-4d96-acf9-45e2e11e17d9", + "cell_type": "code", + "execution_count": 8, + "id": "4a380bd0-199c-4c50-9592-05519b00da7c", "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'megabytes'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "Additionally, the individual measurements and units are available as attributes in the `Tracker` class." + "tracker.max_ram.unit" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "8dbfea09-d603-4a6a-aed3-5c9e62a03fda", + "execution_count": 9, + "id": "0e4c0b1a-8a4c-40d7-9270-a2274456575b", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "67.391488 megabytes\n" - ] + "data": { + "text/plain": [ + "RSSValues(total_rss=63.336448, private_rss=52.170752, shared_rss=11.165695999999999)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "print(tracker.max_ram, tracker.ram_unit)" + "tracker.max_ram.main" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "47e6c47b-586a-4534-9346-76fcf3f5ab83", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "63.336448" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tracker.max_ram.main.total_rss" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "421c5118-7421-4681-bd16-b9697c28078e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "MaxGPURAM(unit='kilobytes', main=0.0, descendents=0.0, combined=0.0)" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tracker.max_gpu_ram" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8d047dfc-f7f8-4906-964a-16fd725f6e4b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ComputeTime(unit='seconds', time=0.06275105476379395)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tracker.compute_time" ] }, { @@ -203,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 13, "id": "f7deae10-a16b-4a38-acc6-c354911200ab", "metadata": {}, "outputs": [ @@ -214,7 +446,7 @@ "Tracks the computational resource usage (RAM, GPU RAM, and compute time) of a process corresponding to a given shell command.\n", "\n", "Usage:\n", - " gpu-tracker --execute= [--output=] [--format=] [--st=] [--ic] [--ru=] [--gru=] [--tu=]\n", + " gpu-tracker --execute= [--output=] [--format=] [--st=] [--ru=] [--gru=] [--tu=]\n", "\n", "Options:\n", " -h --help Show this help message.\n", @@ -222,7 +454,6 @@ " -o --output= File path to store the computational-resource-usage measurements. If not set, prints measurements to the screen.\n", " -f --format= File format of the output. Either 'json' or 'text'. Defaults to 'text'.\n", " --st= The number of seconds to sleep in between usage-collection iterations.\n", - " --ic Stands for include-children; Whether to add the usage (RAM and GPU RAM) of child processes. Otherwise, only collects usage of the main process.\n", " --ru= One of 'bytes', 'kilobytes', 'megabytes', 'gigabytes', or 'terabytes'.\n", " --gru= One of 'bytes', 'kilobytes', 'megabytes', 'gigabytes', or 'terabytes'.\n", " --tu= One of 'seconds', 'minutes', 'hours', or 'days'.\n" @@ -243,7 +474,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 14, "id": "ea7c710f-a238-460d-836c-a979e1c72f4f", "metadata": {}, "outputs": [ @@ -252,10 +483,30 @@ "output_type": "stream", "text": [ "Resource tracking complete. Process completed with status code: 0\n", - "Max RAM: 0.002 gigabytes\n", - "Max GPU RAM: 0.000 gigabytes\n", - "Compute time: 0.001 hours\n", - "\n" + "Max RAM:\n", + " Unit: gigabytes\n", + " System capacity: 67.254\n", + " System: 4.398\n", + " Main:\n", + " Total RSS: 0.002\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.002\n", + " Descendents:\n", + " Total RSS: 0.0\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.0\n", + " Combined:\n", + " Total RSS: 0.002\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.002\n", + "Max GPU RAM:\n", + " Unit: gigabytes\n", + " Main: 0.0\n", + " Descendents: 0.0\n", + " Combined: 0.0\n", + "Compute time:\n", + " Unit: hours\n", + " Time: 0.0\n" ] } ], @@ -268,12 +519,12 @@ "id": "c45091e7-0e85-4a8d-8836-c2dce1bd547f", "metadata": {}, "source": [ - "Like with the API, the units can be modified. For example, --tu stands for time-unit and --ru stands for ram-unit." + "The units of the computational resources can be modified. For example, --tu stands for time-unit and --ru stands for ram-unit." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 15, "id": "cff099a7-1070-42ba-9f2a-008d58863fe6", "metadata": {}, "outputs": [ @@ -282,10 +533,30 @@ "output_type": "stream", "text": [ "Resource tracking complete. Process completed with status code: 0\n", - "Max RAM: 1.966 megabytes\n", - "Max GPU RAM: 0.000 gigabytes\n", - "Compute time: 2.045 seconds\n", - "\n" + "Max RAM:\n", + " Unit: megabytes\n", + " System capacity: 67254.161\n", + " System: 4425.966\n", + " Main:\n", + " Total RSS: 1.663\n", + " Private RSS: 0.139\n", + " Shared RSS: 1.524\n", + " Descendents:\n", + " Total RSS: 0.0\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.0\n", + " Combined:\n", + " Total RSS: 1.663\n", + " Private RSS: 0.139\n", + " Shared RSS: 1.524\n", + "Max GPU RAM:\n", + " Unit: gigabytes\n", + " Main: 0.0\n", + " Descendents: 0.0\n", + " Combined: 0.0\n", + "Compute time:\n", + " Unit: seconds\n", + " Time: 1.075\n" ] } ], @@ -303,7 +574,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 16, "id": "a8520fd9-0907-4c0c-a68f-8fdaec040e1a", "metadata": {}, "outputs": [ @@ -321,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 17, "id": "213550b7-d808-4e11-be37-f2f892e4834b", "metadata": {}, "outputs": [ @@ -329,9 +600,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "Max RAM: 0.002 gigabytes\n", - "Max GPU RAM: 0.000 gigabytes\n", - "Compute time: 0.001 hours\n" + "Max RAM:\n", + " Unit: gigabytes\n", + " System capacity: 67.254\n", + " System: 4.414\n", + " Main:\n", + " Total RSS: 0.002\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.001\n", + " Descendents:\n", + " Total RSS: 0.0\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.0\n", + " Combined:\n", + " Total RSS: 0.002\n", + " Private RSS: 0.0\n", + " Shared RSS: 0.001\n", + "Max GPU RAM:\n", + " Unit: gigabytes\n", + " Main: 0.0\n", + " Descendents: 0.0\n", + " Combined: 0.0\n", + "Compute time:\n", + " Unit: hours\n", + " Time: 0.0" ] } ], @@ -349,7 +641,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 18, "id": "f6fd29d2-cad6-4f9c-8af8-ccf4f0e721d3", "metadata": {}, "outputs": [ @@ -359,12 +651,36 @@ "text": [ "Resource tracking complete. Process completed with status code: 0\n", "{\n", - " \"max_ram\": 0.0019660800000000003,\n", - " \"ram_unit\": \"gigabytes\",\n", - " \"max_gpu_ram\": 0.0,\n", - " \"gpu_ram_unit\": \"gigabytes\",\n", - " \"compute_time\": 0.0005680449803670247,\n", - " \"time_unit\": \"hours\"\n", + " \"max_ram\": {\n", + " \"unit\": \"gigabytes\",\n", + " \"system_capacity\": 67.254161408,\n", + " \"system\": 4.414156800000001,\n", + " \"main\": {\n", + " \"total_rss\": 0.0015892480000000001,\n", + " \"private_rss\": 0.00013516800000000002,\n", + " \"shared_rss\": 0.0014540800000000002\n", + " },\n", + " \"descendents\": {\n", + " \"total_rss\": 0.0,\n", + " \"private_rss\": 0.0,\n", + " \"shared_rss\": 0.0\n", + " },\n", + " \"combined\": {\n", + " \"total_rss\": 0.0015892480000000001,\n", + " \"private_rss\": 0.00013516800000000002,\n", + " \"shared_rss\": 0.0014540800000000002\n", + " }\n", + " },\n", + " \"max_gpu_ram\": {\n", + " \"unit\": \"gigabytes\",\n", + " \"main\": 0.0,\n", + " \"descendents\": 0.0,\n", + " \"combined\": 0.0\n", + " },\n", + " \"compute_time\": {\n", + " \"unit\": \"hours\",\n", + " \"time\": 0.00029873490333557127\n", + " }\n", "}\n" ] } @@ -375,7 +691,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 19, "id": "5c825e42-d100-4533-b218-c36f6380e6ed", "metadata": {}, "outputs": [ @@ -393,7 +709,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 20, "id": "c821972e-0bed-4245-8933-27b0b28589de", "metadata": {}, "outputs": [ @@ -402,12 +718,36 @@ "output_type": "stream", "text": [ "{\n", - " \"max_ram\": 0.0019660800000000003,\n", - " \"ram_unit\": \"gigabytes\",\n", - " \"max_gpu_ram\": 0.0,\n", - " \"gpu_ram_unit\": \"gigabytes\",\n", - " \"compute_time\": 0.0005686806970172458,\n", - " \"time_unit\": \"hours\"\n", + " \"max_ram\": {\n", + " \"unit\": \"gigabytes\",\n", + " \"system_capacity\": 67.254161408,\n", + " \"system\": 4.407083008,\n", + " \"main\": {\n", + " \"total_rss\": 0.001830912,\n", + " \"private_rss\": 0.00013516800000000002,\n", + " \"shared_rss\": 0.0016957440000000001\n", + " },\n", + " \"descendents\": {\n", + " \"total_rss\": 0.0,\n", + " \"private_rss\": 0.0,\n", + " \"shared_rss\": 0.0\n", + " },\n", + " \"combined\": {\n", + " \"total_rss\": 0.001830912,\n", + " \"private_rss\": 0.00013516800000000002,\n", + " \"shared_rss\": 0.0016957440000000001\n", + " }\n", + " },\n", + " \"max_gpu_ram\": {\n", + " \"unit\": \"gigabytes\",\n", + " \"main\": 0.0,\n", + " \"descendents\": 0.0,\n", + " \"combined\": 0.0\n", + " },\n", + " \"compute_time\": {\n", + " \"unit\": \"hours\",\n", + " \"time\": 0.0002994798951678806\n", + " }\n", "}" ] } diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 8b0ffb2..3fe3c6c 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -14,10 +14,7 @@ background. After calling ``start()``, write the code to measure resource usage, followed by calling the ``stop()`` method. The compute time will be the time from the call to ``start()`` to the call to ``stop()`` and the RAM and GPU RAM quantities will be the amount of RAM -used by the code that’s in between ``start()`` and ``stop()``. The -``Tracker`` class additionally has a ``__str__`` method so it can be -printed as a string that formats the values and units of each -computational resource. +used by the code that’s in between ``start()`` and ``stop()``. .. code:: python3 @@ -29,18 +26,78 @@ computational resource. tracker.start() # Perform expensive operations tracker.stop() - print(tracker) +The ``Tracker`` class implements the ``__str__`` method so it can be +printed as a string with the values and units of each computational +resource formatted. -.. code:: none +.. code:: python3 - Max RAM: 0.067 gigabytes - Max GPU RAM: 0.000 gigabytes - Compute time: 0.000 hours + print(tracker) -The equivalent can be accomplished using ``Tracker`` as a context -manager rather than explicitly calling ``start()`` and ``stop()``. +.. code:: none + + Max RAM: + Unit: gigabytes + System capacity: 67.254 + System: 4.545 + Main: + Total RSS: 0.061 + Private RSS: 0.05 + Shared RSS: 0.011 + Descendents: + Total RSS: 0.0 + Private RSS: 0.0 + Shared RSS: 0.0 + Combined: + Total RSS: 0.063 + Private RSS: 0.052 + Shared RSS: 0.011 + Max GPU RAM: + Unit: gigabytes + Main: 0.0 + Descendents: 0.0 + Combined: 0.0 + Compute time: + Unit: hours + Time: 0.0 + + +The system capacity is a constant for the total RAM capacity across the +entire operating system, not to be confused with the maximum system RAM +which is the maximum OS RAM that was actually used over the duration of +the computational-resource tracking. Both the RAM and GPU RAM are split +up into 3 sections, namely the usage of the main process itself followed +by the summed usage of any descendent processes it may have (i.e. child +processes, grandchild processes, etc.), and combined usage which is the +sum of the main and its descendent processes. RAM is divided further to +include the private RSS (RAM usage unique to the process), shared RSS +(RAM that’s shared by a process and at least one other process), and +total RSS (the sum of private and shared RSS). The private and shared +RSS values are only available on Linux distributions. So for non-linux +operating systems, the privated and shared RSS will remain 0 and only +the total RSS will be reported. Theoretically, the combined total RSS +would never exceed the overall system RAM usage, but inaccuracies +resulting from shared RSS can cause this to happen, especially for +non-linux operating systems (see note below). + +The ``Tracker`` assumes that GPU memory is not shared accross multiple +processes and if it is, the reported GPU RAM of “descendent” and +“combined” may be an overestimation. + +The compute time is the real time that the computational-resource +tracking lasted (as compared to CPU time). + +**NOTE** *The keywords “descendents” and “combined” in the output above +indicate a sum of the RSS used by multiple processes. It’s important to +keep in mind that on non-linux operating systems, this sum does not take +into account shared memory but rather adds up the total RSS of all +processes, which can lead to an overestimation. For Linux distributions, +however, pieces of shared memory are only counted once.* + +The ``Tracker`` can alternatively be used as a context manager rather +than explicitly calling ``start()`` and ``stop()``. .. code:: python3 @@ -52,9 +109,30 @@ manager rather than explicitly calling ``start()`` and ``stop()``. .. code:: none - Max RAM: 0.067 gigabytes - Max GPU RAM: 0.000 gigabytes - Compute time: 0.000 hours + Max RAM: + Unit: gigabytes + System capacity: 67.254 + System: 4.549 + Main: + Total RSS: 0.063 + Private RSS: 0.052 + Shared RSS: 0.011 + Descendents: + Total RSS: 0.0 + Private RSS: 0.0 + Shared RSS: 0.0 + Combined: + Total RSS: 0.063 + Private RSS: 0.052 + Shared RSS: 0.011 + Max GPU RAM: + Unit: gigabytes + Main: 0.0 + Descendents: 0.0 + Combined: 0.0 + Compute time: + Unit: hours + Time: 0.0 The units of the computational resources can be modified as desired. For @@ -71,43 +149,121 @@ the compute time in seconds: .. code:: none - Max RAM: 67.391 megabytes - Max GPU RAM: 0.000 kilobytes - Compute time: 1.035 seconds + Max RAM: + Unit: megabytes + System capacity: 67254.161 + System: 4548.833 + Main: + Total RSS: 63.279 + Private RSS: 52.064 + Shared RSS: 11.215 + Descendents: + Total RSS: 0.0 + Private RSS: 0.0 + Shared RSS: 0.0 + Combined: + Total RSS: 63.283 + Private RSS: 52.068 + Shared RSS: 11.215 + Max GPU RAM: + Unit: kilobytes + Main: 0.0 + Descendents: 0.0 + Combined: 0.0 + Compute time: + Unit: seconds + Time: 0.059 + + +The same information as the text format can be provided as a dictionary +via the ``to_json()`` method of the ``Tracker``. + +.. code:: python3 + + import json + print(json.dumps(tracker.to_json(), indent=1)) + + +.. code:: none + + { + "max_ram": { + "unit": "megabytes", + "system_capacity": 67254.161408, + "system": 4548.83328, + "main": { + "total_rss": 63.279104000000004, + "private_rss": 52.064256, + "shared_rss": 11.214848 + }, + "descendents": { + "total_rss": 0.0, + "private_rss": 0.0, + "shared_rss": 0.0 + }, + "combined": { + "total_rss": 63.283199999999994, + "private_rss": 52.068352, + "shared_rss": 11.214848 + } + }, + "max_gpu_ram": { + "unit": "kilobytes", + "main": 0.0, + "descendents": 0.0, + "combined": 0.0 + }, + "compute_time": { + "unit": "seconds", + "time": 0.058912038803100586 + } + } -The same information can be obtained in a dictionary via the -``Tracker``\ ’s ``to_json()`` method. +The ``Tracker`` class additionally has fields that provide the usage +information for each computational resource as python data classes. .. code:: python3 - tracker.to_json() + tracker.max_ram .. code:: none - {'max_ram': 67.391488, - 'ram_unit': 'megabytes', - 'max_gpu_ram': 0.0, - 'gpu_ram_unit': 'kilobytes', - 'compute_time': 1.0349853038787842, - 'time_unit': 'seconds'} + MaxRAM(unit='megabytes', system_capacity=67254.161408, system=4548.83328, main=RSSValues(total_rss=63.279104000000004, private_rss=52.064256, shared_rss=11.214848), descendents=RSSValues(total_rss=0.0, private_rss=0.0, shared_rss=0.0), combined=RSSValues(total_rss=63.283199999999994, private_rss=52.068352, shared_rss=11.214848)) + + + +.. code:: python3 + + trac + +.. code:: python3 + + tracker.max_gpu_ram + + + + +.. code:: none + MaxGPURAM(unit='kilobytes', main=0.0, descendents=0.0, combined=0.0) -Additionally, the individual measurements and units are available as -attributes in the ``Tracker`` class. .. code:: python3 - print(tracker.max_ram, tracker.ram_unit) + tracker.compute_time + + .. code:: none - 67.391488 megabytes + ComputeTime(unit='seconds', time=0.058912038803100586) + CLI @@ -128,7 +284,7 @@ help message. Tracks the computational resource usage (RAM, GPU RAM, and compute time) of a process corresponding to a given shell command. Usage: - gpu-tracker --execute= [--output=] [--format=] [--st=] [--ic] [--ru=] [--gru=] [--tu=] + gpu-tracker --execute= [--output=] [--format=] [--st=] [--ru=] [--gru=] [--tu=] Options: -h --help Show this help message. @@ -136,7 +292,6 @@ help message. -o --output= File path to store the computational-resource-usage measurements. If not set, prints measurements to the screen. -f --format= File format of the output. Either 'json' or 'text'. Defaults to 'text'. --st= The number of seconds to sleep in between usage-collection iterations. - --ic Stands for include-children; Whether to add the usage (RAM and GPU RAM) of child processes. Otherwise, only collects usage of the main process. --ru= One of 'bytes', 'kilobytes', 'megabytes', 'gigabytes', or 'terabytes'. --gru= One of 'bytes', 'kilobytes', 'megabytes', 'gigabytes', or 'terabytes'. --tu= One of 'seconds', 'minutes', 'hours', or 'days'. @@ -156,14 +311,34 @@ status code is reported. .. code:: none Resource tracking complete. Process completed with status code: 0 - Max RAM: 0.002 gigabytes - Max GPU RAM: 0.000 gigabytes - Compute time: 0.001 hours - - - -Like with the API, the units can be modified. For example, –tu stands -for time-unit and –ru stands for ram-unit. + Max RAM: + Unit: gigabytes + System capacity: 67.254 + System: 4.548 + Main: + Total RSS: 0.002 + Private RSS: 0.0 + Shared RSS: 0.002 + Descendents: + Total RSS: 0.0 + Private RSS: 0.0 + Shared RSS: 0.0 + Combined: + Total RSS: 0.002 + Private RSS: 0.0 + Shared RSS: 0.002 + Max GPU RAM: + Unit: gigabytes + Main: 0.0 + Descendents: 0.0 + Combined: 0.0 + Compute time: + Unit: hours + Time: 0.0 + + +The units of the computational resources can be modified. For example, +–tu stands for time-unit and –ru stands for ram-unit. .. code:: none @@ -173,10 +348,30 @@ for time-unit and –ru stands for ram-unit. .. code:: none Resource tracking complete. Process completed with status code: 0 - Max RAM: 1.966 megabytes - Max GPU RAM: 0.000 gigabytes - Compute time: 2.045 seconds - + Max RAM: + Unit: megabytes + System capacity: 67254.161 + System: 4550.529 + Main: + Total RSS: 1.831 + Private RSS: 0.135 + Shared RSS: 1.696 + Descendents: + Total RSS: 0.0 + Private RSS: 0.0 + Shared RSS: 0.0 + Combined: + Total RSS: 1.831 + Private RSS: 0.135 + Shared RSS: 1.696 + Max GPU RAM: + Unit: gigabytes + Main: 0.0 + Descendents: 0.0 + Combined: 0.0 + Compute time: + Unit: seconds + Time: 1.075 By default, the computational-resource-usage statistics are printed to @@ -200,10 +395,30 @@ that same content in a file. .. code:: none - Max RAM: 0.002 gigabytes - Max GPU RAM: 0.000 gigabytes - Compute time: 0.001 hours - + Max RAM: + Unit: gigabytes + System capacity: 67.254 + System: 4.567 + Main: + Total RSS: 0.002 + Private RSS: 0.0 + Shared RSS: 0.002 + Descendents: + Total RSS: 0.0 + Private RSS: 0.0 + Shared RSS: 0.0 + Combined: + Total RSS: 0.002 + Private RSS: 0.0 + Shared RSS: 0.002 + Max GPU RAM: + Unit: gigabytes + Main: 0.0 + Descendents: 0.0 + Combined: 0.0 + Compute time: + Unit: hours + Time: 0.0 By default, the format of the output is “text”. The ``-f`` or ``--format`` option can specify the format to be “json” instead. @@ -217,12 +432,36 @@ By default, the format of the output is “text”. The ``-f`` or Resource tracking complete. Process completed with status code: 0 { - "max_ram": 0.0019660800000000003, - "ram_unit": "gigabytes", - "max_gpu_ram": 0.0, - "gpu_ram_unit": "gigabytes", - "compute_time": 0.0005680449803670247, - "time_unit": "hours" + "max_ram": { + "unit": "gigabytes", + "system_capacity": 67.254161408, + "system": 4.582764544000001, + "main": { + "total_rss": 0.001662976, + "private_rss": 0.00013516800000000002, + "shared_rss": 0.001527808 + }, + "descendents": { + "total_rss": 0.0, + "private_rss": 0.0, + "shared_rss": 0.0 + }, + "combined": { + "total_rss": 0.001662976, + "private_rss": 0.00013516800000000002, + "shared_rss": 0.001527808 + } + }, + "max_gpu_ram": { + "unit": "gigabytes", + "main": 0.0, + "descendents": 0.0, + "combined": 0.0 + }, + "compute_time": { + "unit": "hours", + "time": 0.00030033310254414875 + } } @@ -244,10 +483,34 @@ By default, the format of the output is “text”. The ``-f`` or .. code:: none { - "max_ram": 0.0019660800000000003, - "ram_unit": "gigabytes", - "max_gpu_ram": 0.0, - "gpu_ram_unit": "gigabytes", - "compute_time": 0.0005686806970172458, - "time_unit": "hours" + "max_ram": { + "unit": "gigabytes", + "system_capacity": 67.254161408, + "system": 4.584312832, + "main": { + "total_rss": 0.0017162240000000001, + "private_rss": 0.00013516800000000002, + "shared_rss": 0.0015810560000000002 + }, + "descendents": { + "total_rss": 0.0, + "private_rss": 0.0, + "shared_rss": 0.0 + }, + "combined": { + "total_rss": 0.0017162240000000001, + "private_rss": 0.00013516800000000002, + "shared_rss": 0.0015810560000000002 + } + }, + "max_gpu_ram": { + "unit": "gigabytes", + "main": 0.0, + "descendents": 0.0, + "combined": 0.0 + }, + "compute_time": { + "unit": "hours", + "time": 0.0002998979224099053 + } } diff --git a/src/gpu_tracker/__main__.py b/src/gpu_tracker/__main__.py index 0220d26..cab6626 100644 --- a/src/gpu_tracker/__main__.py +++ b/src/gpu_tracker/__main__.py @@ -30,7 +30,7 @@ def main(): option_map = { '--st': 'sleep_time', '--ru': 'ram_unit', - '--gu': 'gpu_unit', + '--gru': 'gpu_ram_unit', '--tu': 'time_unit' } kwargs = { @@ -51,10 +51,9 @@ def main(): process.wait() print(f'Resource tracking complete. Process completed with status code: {process.returncode}') if output_format == 'json': - # TODO just do JSON (no text) and add units to json object. output_str = json.dumps(tracker.to_json(), indent=1) elif output_format == 'text': - output_str = str(tracker) + '\n' + output_str = str(tracker) else: raise ValueError(f'"{output_format} is not a valid format. Valid values are "json" or "text".') if output is None: diff --git a/src/gpu_tracker/tracker.py b/src/gpu_tracker/tracker.py index e2895c4..6062341 100644 --- a/src/gpu_tracker/tracker.py +++ b/src/gpu_tracker/tracker.py @@ -12,37 +12,6 @@ import sys -@dclass.dataclass -class RSSValues: - total_rss: float = 0. - private_rss: float = 0. - shared_rss: float = 0. - - -@dclass.dataclass -class MaxRAM: - unit: str - system_capacity: float - system: float = 0. - main: RSSValues = dclass.field(default_factory=RSSValues) - descendents: RSSValues = dclass.field(default_factory=RSSValues) - combined: RSSValues = dclass.field(default_factory=RSSValues) - - -@dclass.dataclass -class MaxGPURAM: - unit: str - main: float = 0. - descendents: float = 0. - combined: float = 0. - - -@dclass.dataclass -class ComputeTime: - unit: str - time: float = 0. - - class Tracker: """ Runs a thread in the background that tracks the compute time, maximum RAM, and maximum GPU RAM usage within a context manager or explicit ``start()`` and ``stop()`` methods. @@ -256,7 +225,7 @@ def _format_float(dictionary: dict): elif type(value) == dict: Tracker._format_float(value) - def to_json(self): + def to_json(self) -> dict[str, dict]: """ Constructs a dictionary of the computational-resource-usage measurements and their units. """ @@ -267,6 +236,60 @@ def to_json(self): } +@dclass.dataclass +class RSSValues: + """ + :param total_rss: The sum of ``private_rss`` and ``shared_rss``. + :param private_rss: The RAM usage exclusive to a process. + :param shared_rss: The RAM usage of a process shared with at least one other process. + """ + total_rss: float = 0. + private_rss: float = 0. + shared_rss: float = 0. + + +@dclass.dataclass +class MaxRAM: + """ + :param unit: The unit of measurement for RAM e.g. gigabytes. + :param system_capacity: A constant value for the RAM capacity of the entire operating system. + :param system: The RAM usage across the entire operating system. + :param main: The RAM usage of the main process. + :param descendents: The summed RAM usage of the descendent processes (i.e. child processes, grandchild processes, etc.). + :param combined: The summed RAM usage of both the main process and any descendent processes it may have. + """ + unit: str + system_capacity: float + system: float = 0. + main: RSSValues = dclass.field(default_factory=RSSValues) + descendents: RSSValues = dclass.field(default_factory=RSSValues) + combined: RSSValues = dclass.field(default_factory=RSSValues) + + +@dclass.dataclass +class MaxGPURAM: + """ + :param unit: The unit of measurement for GPU RAM e.g. gigabytes. + :param main: The GPU RAM usage of the main process. + :param descendents: The summed GPU RAM usage of the descendent processes (i.e. child processes, grandchild processes, etc.). + :param combined: The summed GPU RAM usage of both the main process and any descendent processes it may have. + """ + unit: str + main: float = 0. + descendents: float = 0. + combined: float = 0. + + +@dclass.dataclass +class ComputeTime: + """ + :param unit: The unit of measurement for compute time e.g. hours. + :param time: The real compute time. + """ + unit: str + time: float = 0. + + def _testable_sleep(sleep_time: float): """ The time.sleep() function causes issues when mocked in tests, so we create this wrapper that can be safely mocked.