From ecbd7fc2452029113072a7991f0d0143595ba80d Mon Sep 17 00:00:00 2001 From: Moritz Reiber Date: Wed, 25 Oct 2023 14:49:35 +0000 Subject: [PATCH] Update NAS experiments --- .../embedded_vision_net_ri/config.yaml | 43 ++++++++ experiments/embedded_vision_net_ri/eval.yaml | 69 ++++++++++++ .../experiment/ae_nas_cifar10.yaml | 21 ++++ .../experiment/ae_nas_cifar10_v2.yaml | 21 ++++ .../experiment/ae_nas_ri.yaml | 18 +++ .../experiment/random_nas.yaml | 17 +++ .../experiment/random_nas_cifar10.yaml} | 11 +- .../experiment/random_nas_cifar10_nopool.yaml | 23 ++++ .../hydra/launcher/ml_cloud_4gpu.yaml | 26 +++++ .../result_exploration.ipynb | 104 ++++++++++++++++++ .../scripts/experiment_slurm.sh | 65 +++++++++++ .../scripts/run-ae_nas-slurm-cifar10.sh | 62 +++++++++++ .../scripts/run-ae_nas-slurm-ri.sh | 62 +++++++++++ .../scripts/run-random_nas-slurm-cifar10.sh | 62 +++++++++++ .../scripts/run-random_nas-slurm.sh | 62 +++++++++++ .../scripts/train_all_baselines.sh | 31 ++++++ .../scripts/train_all_baselines_submitit.sh | 29 +++++ hannah/conf/nas/aging_evolution_nas.yaml | 6 +- hannah/conf/nas/sampler/aging_evolution.yaml | 2 +- hannah/models/embedded_vision_net/models.py | 9 +- hannah/nas/eval/__main__.py | 2 +- hannah/nas/eval/extract.py | 2 +- hannah/nas/eval/prepare.py | 2 +- hannah/nas/search/sampler/aging_evolution.py | 3 +- 24 files changed, 735 insertions(+), 17 deletions(-) create mode 100644 experiments/embedded_vision_net_ri/config.yaml create mode 100644 experiments/embedded_vision_net_ri/eval.yaml create mode 100644 experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10.yaml create mode 100644 experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10_v2.yaml create mode 100644 experiments/embedded_vision_net_ri/experiment/ae_nas_ri.yaml create mode 100644 experiments/embedded_vision_net_ri/experiment/random_nas.yaml rename experiments/{embedded_vision_net/embedded_vision_net_ri/experiment/random_nas.yaml => embedded_vision_net_ri/experiment/random_nas_cifar10.yaml} (53%) create mode 100644 experiments/embedded_vision_net_ri/experiment/random_nas_cifar10_nopool.yaml create mode 100644 experiments/embedded_vision_net_ri/hydra/launcher/ml_cloud_4gpu.yaml create mode 100644 experiments/embedded_vision_net_ri/result_exploration.ipynb create mode 100755 experiments/embedded_vision_net_ri/scripts/experiment_slurm.sh create mode 100755 experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-cifar10.sh create mode 100755 experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-ri.sh create mode 100755 experiments/embedded_vision_net_ri/scripts/run-random_nas-slurm-cifar10.sh create mode 100755 experiments/embedded_vision_net_ri/scripts/run-random_nas-slurm.sh create mode 100755 experiments/embedded_vision_net_ri/scripts/train_all_baselines.sh create mode 100755 experiments/embedded_vision_net_ri/scripts/train_all_baselines_submitit.sh diff --git a/experiments/embedded_vision_net_ri/config.yaml b/experiments/embedded_vision_net_ri/config.yaml new file mode 100644 index 00000000..5f7f022c --- /dev/null +++ b/experiments/embedded_vision_net_ri/config.yaml @@ -0,0 +1,43 @@ +## +## Copyright (c) 2022 University of Tübingen. +## +## This file is part of hannah. +## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +defaults: + - base_config + - experiment: optional + - override dataset: ri_capsule # Dataset configuration name + - override features: identity # Feature extractor configuration name (use identity for vision datasets) + #- override model: timm_mobilenetv3_small_075 # Neural network name (for now timm_resnet50 or timm_efficientnet_lite1) + - override scheduler: 1cycle # learning rate scheduler config name + - override optimizer: adamw # Optimizer config name + - override normalizer: null # Feature normalizer (used for quantized neural networks) + - override module: image_classifier # Lightning module config for the training loop (image classifier for image classification tasks) + - _self_ + + +dataset: + data_folder: ${oc.env:HANNAH_DATA_FOLDER,${hydra:runtime.cwd}/../../datasets/} + +module: + batch_size: 16 + num_workers: 8 + +trainer: + max_epochs: 10 + +scheduler: + max_lr: 0.001 diff --git a/experiments/embedded_vision_net_ri/eval.yaml b/experiments/embedded_vision_net_ri/eval.yaml new file mode 100644 index 00000000..ff931120 --- /dev/null +++ b/experiments/embedded_vision_net_ri/eval.yaml @@ -0,0 +1,69 @@ +## +## Copyright (c) 2022 University of Tübingen. +## +## This file is part of hannah. +## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +data: + AE: trained_models/ae_nas_cifar10/embedded_vision_net + RANDOM: trained_models/random_nas_cifar10_workingbn/embedded_vision_net + +metrics: + total_act: + name: Activations + total_weights: + name: Weights + weights_m: + name: Weights [M] + derived: data["total_weights"] / 1000 / 1000 + val_accuracy: + name: Accuracy [%] + derived: (1.0 - data["val_error"]) * 100.0 + act_k: + name: Activations [k] + derived: data["total_act"] / 1000 + macs_m: + name: MACS [M] + derived: data["total_macs"] / 1000 / 1000 + +plots: + # Comparison plots 2-3 metrics using y, x and size as visualization points + - type: comparison + name: accuracy_memory + metrics: + - val_accuracy + - weights_m + - act_k + + - type: comparison + name: accuracy_macs + metrics: + - val_accuracy + - macs_m + +extract: + AE: + bounds: + val_error: 0.20 + total_macs: 100000000 + total_weights: 1000000 + + +experiment: embedded_vision_net_ri +force: false + +hydra: + run: + dir: ./nas_results/${experiment} diff --git a/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10.yaml b/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10.yaml new file mode 100644 index 00000000..3b31f914 --- /dev/null +++ b/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10.yaml @@ -0,0 +1,21 @@ +# @package _global_ +defaults: + - override /nas: aging_evolution_nas + - override /model: embedded_vision_nas + - override /dataset: cifar10 + +model: + num_classes: 10 +module: + batch_size: 128 +nas: + budget: 300 + n_jobs: 8 + + +trainer: + max_epochs: 10 + +seed: [1234] + +experiment_id: "ae_nas_cifar10" diff --git a/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10_v2.yaml b/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10_v2.yaml new file mode 100644 index 00000000..ac5ec9fa --- /dev/null +++ b/experiments/embedded_vision_net_ri/experiment/ae_nas_cifar10_v2.yaml @@ -0,0 +1,21 @@ +# @package _global_ +defaults: + - override /nas: aging_evolution_nas + - override /model: embedded_vision_nas + - override /dataset: cifar10 + +model: + num_classes: 10 +module: + batch_size: 128 +nas: + budget: 600 + n_jobs: 8 + + +trainer: + max_epochs: 10 + +seed: [1234] + +experiment_id: "ae_nas_cifar10_v2" diff --git a/experiments/embedded_vision_net_ri/experiment/ae_nas_ri.yaml b/experiments/embedded_vision_net_ri/experiment/ae_nas_ri.yaml new file mode 100644 index 00000000..d3f40413 --- /dev/null +++ b/experiments/embedded_vision_net_ri/experiment/ae_nas_ri.yaml @@ -0,0 +1,18 @@ +# @package _global_ +defaults: + - override /nas: aging_evolution_nas + - override /model: embedded_vision_net + + +nas: + budget: 300 + n_jobs: 8 + presample: False + + +trainer: + max_epochs: 10 + +seed: [1234] + +experiment_id: "ae_nas_ri" diff --git a/experiments/embedded_vision_net_ri/experiment/random_nas.yaml b/experiments/embedded_vision_net_ri/experiment/random_nas.yaml new file mode 100644 index 00000000..a1fd0b08 --- /dev/null +++ b/experiments/embedded_vision_net_ri/experiment/random_nas.yaml @@ -0,0 +1,17 @@ +# @package _global_ +defaults: + - override /nas: random_nas + - override /model: embedded_vision_net + + +nas: + budget: 300 + n_jobs: 8 + + +trainer: + max_epochs: 10 + +seed: [1234] + +oxperiment_id: "random_nas" diff --git a/experiments/embedded_vision_net/embedded_vision_net_ri/experiment/random_nas.yaml b/experiments/embedded_vision_net_ri/experiment/random_nas_cifar10.yaml similarity index 53% rename from experiments/embedded_vision_net/embedded_vision_net_ri/experiment/random_nas.yaml rename to experiments/embedded_vision_net_ri/experiment/random_nas_cifar10.yaml index f7fca962..ef33c5b6 100644 --- a/experiments/embedded_vision_net/embedded_vision_net_ri/experiment/random_nas.yaml +++ b/experiments/embedded_vision_net_ri/experiment/random_nas_cifar10.yaml @@ -2,12 +2,15 @@ defaults: - override /nas: random_nas - override /model: embedded_vision_nas + - override /dataset: cifar10 - +model: + num_classes: 10 +module: + batch_size: 128 nas: - budget: 100 + budget: 300 n_jobs: 8 - presample: False trainer: @@ -15,4 +18,4 @@ trainer: seed: [1234] -experiment_id: "random_nas_log_saving" +experiment_id: "random_nas_cifar10_workingbn" diff --git a/experiments/embedded_vision_net_ri/experiment/random_nas_cifar10_nopool.yaml b/experiments/embedded_vision_net_ri/experiment/random_nas_cifar10_nopool.yaml new file mode 100644 index 00000000..3d642938 --- /dev/null +++ b/experiments/embedded_vision_net_ri/experiment/random_nas_cifar10_nopool.yaml @@ -0,0 +1,23 @@ +# @package _global_ +defaults: + - override /nas: random_nas + - override /model: embedded_vision_nas + - override /dataset: cifar10 + +model: + num_classes: 10 +module: + batch_size: 128 +nas: + budget: 300 + n_jobs: 8 + predictor: + model: + input_feature_size: 30 + +trainer: + max_epochs: 10 + +seed: [1234] + +experiment_id: "random_nas_cifar10_nopool" diff --git a/experiments/embedded_vision_net_ri/hydra/launcher/ml_cloud_4gpu.yaml b/experiments/embedded_vision_net_ri/hydra/launcher/ml_cloud_4gpu.yaml new file mode 100644 index 00000000..940c9d6b --- /dev/null +++ b/experiments/embedded_vision_net_ri/hydra/launcher/ml_cloud_4gpu.yaml @@ -0,0 +1,26 @@ +## +## Copyright (c) 2022 University of Tübingen. +## +## This file is part of hannah. +## See https://atreus.informatik.uni-tuebingen.de/ties/ai/hannah/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## +defaults: + - submitit_slurm + - _self_ + +timeout_min: 3600 +gpus_per_task: 4 +cpus_per_gpu: 8 +partition: gpu-2080ti diff --git a/experiments/embedded_vision_net_ri/result_exploration.ipynb b/experiments/embedded_vision_net_ri/result_exploration.ipynb new file mode 100644 index 00000000..b29acd67 --- /dev/null +++ b/experiments/embedded_vision_net_ri/result_exploration.ipynb @@ -0,0 +1,104 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import pickle as pkl\n", + "from pathlib import Path\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "metrics = pd.read_pickle(Path(\"metrics.pkl\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "def minseries(series):\n", + " s = []\n", + " for i in series:\n", + " if s:\n", + " s.append(min(s[-1], i))\n", + " else:\n", + " s.append(i)\n", + " return s" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "metrics_ae = metrics[metrics['Task'] == \"AE\"]\n", + "metrics_rn = metrics[metrics['Task'] == \"RANDOM\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(minseries(metrics_rn[\"val_error\"]), label=\"RN\")\n", + "plt.plot(minseries(metrics_ae[\"val_error\"]), label=\"AE\")\n", + "plt.axvline(20, color=\"grey\", linestyle=\"--\")\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "hannah-rvIoGOA8-py3.10", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/experiments/embedded_vision_net_ri/scripts/experiment_slurm.sh b/experiments/embedded_vision_net_ri/scripts/experiment_slurm.sh new file mode 100755 index 00000000..b4bd63e9 --- /dev/null +++ b/experiments/embedded_vision_net_ri/scripts/experiment_slurm.sh @@ -0,0 +1,65 @@ +#!/bin/bash +## +## Copyright (c) 2023 Hannah contributors. +## +## This file is part of hannah. +## See https://github.com/ekut-es/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## + + +#SBATCH --job-name=ri_random_nas + +#resources: + +#SBATCH --partition=gpu-2080ti +# the slurm partition the job is queued to. +# FIXME: test if preemptable is avallable + +#SBATCH --nodes=1 +# requests that the cores are all on one node + +#SBATCH --gres=gpu:rtx2080ti:2 +#the job can use and see 4 GPUs (8 GPUs are available in total on one node) + +#SBATCH --time=4320 +# the maximum time the scripts needs to run (720 minutes = 12 hours) + +#SBATCH --error=jobs/%j.err +# write the error output to job.*jobID*.err + +#SBATCH --output=jobs/%j.out +# write the standard output to your home directory job.*jobID*.out + +#SBATCH --mail-type=ALL +#write a mail if a job begins, ends, fails, gets requeued or stages out + +#SBATCH --mail-user=christoph.gerum@uni-tuebingen.de +# your mail address + + +#Script +echo "Job information" +scontrol show job $SLURM_JOB_ID + + +export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/ + +GPUS=2 +BATCH_SIZE=32 + +# trainer=sharded + +hannah-train +experiment=$1 model=embedded_vision_net trainer.gpus=${gpus} module.batch_size=${BATCH_SIZE} +module.num_workers=16 diff --git a/experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-cifar10.sh b/experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-cifar10.sh new file mode 100755 index 00000000..f8c72ced --- /dev/null +++ b/experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-cifar10.sh @@ -0,0 +1,62 @@ +#!/bin/bash +## +## Copyright (c) 2023 Hannah contributors. +## +## This file is part of hannah. +## See https://github.com/ekut-es/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## + + +#SBATCH --job-name=run-random_nas + +#resources: + +#SBATCH --partition=gpu-2080ti +# the slurm partition the job is queued to. +# FIXME: test if preemptable is avallable + +#SBATCH --nodes=1 +# requests that the cores are all on one node + +#SBATCH --gres=gpu:rtx2080ti:8 +#the job can use and see 4 GPUs (8 GPUs are available in total on one node) + +#SBATCH --time=4320 +# the maximum time the scripts needs to run (720 minutes = 12 hours) + +#SBATCH --error=jobs/%j.err +# write the error output to job.*jobID*.err + +#SBATCH --output=jobs/%j.out +# write the standard output to your home directory job.*jobID*.out + +#SBATCH --mail-type=ALL +#write a mail if a job begins, ends, fails, gets requeued or stages out + +#SBATCH --mail-user=moritz.reiber@uni-tuebingen.de +# your mail address + + +#Script +echo "Job information" +scontrol show job $SLURM_JOB_ID + + + +# export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/ +conda activate hannah + + +hannah-train trainer.gpus=8 experiment=ae_nas_cifar10_v2 model=embedded_vision_net dataset=cifar10 model.num_classes=10 nas.n_jobs=8 fx_mac_summary=True ~normalizer diff --git a/experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-ri.sh b/experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-ri.sh new file mode 100755 index 00000000..32786e1e --- /dev/null +++ b/experiments/embedded_vision_net_ri/scripts/run-ae_nas-slurm-ri.sh @@ -0,0 +1,62 @@ +#!/bin/bash +## +## Copyright (c) 2023 Hannah contributors. +## +## This file is part of hannah. +## See https://github.com/ekut-es/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## + + +#SBATCH --job-name=run-random_nas + +#resources: + +#SBATCH --partition=gpu-2080ti +# the slurm partition the job is queued to. +# FIXME: test if preemptable is avallable + +#SBATCH --nodes=1 +# requests that the cores are all on one node + +#SBATCH --gres=gpu:rtx2080ti:8 +#the job can use and see 4 GPUs (8 GPUs are available in total on one node) + +#SBATCH --time=4320 +# the maximum time the scripts needs to run (720 minutes = 12 hours) + +#SBATCH --error=jobs/%j.err +# write the error output to job.*jobID*.err + +#SBATCH --output=jobs/%j.out +# write the standard output to your home directory job.*jobID*.out + +#SBATCH --mail-type=ALL +#write a mail if a job begins, ends, fails, gets requeued or stages out + +#SBATCH --mail-user=moritz.reiber@uni-tuebingen.de +# your mail address + + +#Script +echo "Job information" +scontrol show job $SLURM_JOB_ID + + + +export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/ +conda activate hannah + + +hannah-train trainer.gpus=8 experiment=ae_nas_ri model=embedded_vision_net model.num_classes=4 nas.n_jobs=8 fx_mac_summary=True ~normalizer diff --git a/experiments/embedded_vision_net_ri/scripts/run-random_nas-slurm-cifar10.sh b/experiments/embedded_vision_net_ri/scripts/run-random_nas-slurm-cifar10.sh new file mode 100755 index 00000000..e8008d65 --- /dev/null +++ b/experiments/embedded_vision_net_ri/scripts/run-random_nas-slurm-cifar10.sh @@ -0,0 +1,62 @@ +#!/bin/bash +## +## Copyright (c) 2023 Hannah contributors. +## +## This file is part of hannah. +## See https://github.com/ekut-es/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## + + +#SBATCH --job-name=run-random_nas + +#resources: + +#SBATCH --partition=gpu-2080ti +# the slurm partition the job is queued to. +# FIXME: test if preemptable is avallable + +#SBATCH --nodes=1 +# requests that the cores are all on one node + +#SBATCH --gres=gpu:rtx2080ti:8 +#the job can use and see 4 GPUs (8 GPUs are available in total on one node) + +#SBATCH --time=4320 +# the maximum time the scripts needs to run (720 minutes = 12 hours) + +#SBATCH --error=jobs/%j.err +# write the error output to job.*jobID*.err + +#SBATCH --output=jobs/%j.out +# write the standard output to your home directory job.*jobID*.out + +#SBATCH --mail-type=ALL +#write a mail if a job begins, ends, fails, gets requeued or stages out + +#SBATCH --mail-user=moritz.reiber@uni-tuebingen.de +# your mail address + + +#Script +echo "Job information" +scontrol show job $SLURM_JOB_ID + + + +# export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/ +conda activate hannah + + +hannah-train trainer.gpus=8 experiment=random_nas_cifar10 model=embedded_vision_net dataset=cifar10 model.num_classes=10 nas.n_jobs=8 fx_mac_summary=True ~normalizer diff --git a/experiments/embedded_vision_net_ri/scripts/run-random_nas-slurm.sh b/experiments/embedded_vision_net_ri/scripts/run-random_nas-slurm.sh new file mode 100755 index 00000000..87a98b8f --- /dev/null +++ b/experiments/embedded_vision_net_ri/scripts/run-random_nas-slurm.sh @@ -0,0 +1,62 @@ +#!/bin/bash +## +## Copyright (c) 2023 Hannah contributors. +## +## This file is part of hannah. +## See https://github.com/ekut-es/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## + + +#SBATCH --job-name=run-random_nas + +#resources: + +#SBATCH --partition=gpu-2080ti +# the slurm partition the job is queued to. +# FIXME: test if preemptable is avallable + +#SBATCH --nodes=1 +# requests that the cores are all on one node + +#SBATCH --gres=gpu:rtx2080ti:8 +#the job can use and see 4 GPUs (8 GPUs are available in total on one node) + +#SBATCH --time=4320 +# the maximum time the scripts needs to run (720 minutes = 12 hours) + +#SBATCH --error=jobs/%j.err +# write the error output to job.*jobID*.err + +#SBATCH --output=jobs/%j.out +# write the standard output to your home directory job.*jobID*.out + +#SBATCH --mail-type=ALL +#write a mail if a job begins, ends, fails, gets requeued or stages out + +#SBATCH --mail-user=christoph.gerum@uni-tuebingen.de +# your mail address + + +#Script +echo "Job information" +scontrol show job $SLURM_JOB_ID + + + +export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/ +conda activate hannah + + +hannah-train trainer.gpus=8 module.batch_size=16 experiment=random_nas model=embedded_vision_net nas.n_jobs=8 fx_mac_summary=True ~normalizer diff --git a/experiments/embedded_vision_net_ri/scripts/train_all_baselines.sh b/experiments/embedded_vision_net_ri/scripts/train_all_baselines.sh new file mode 100755 index 00000000..19153763 --- /dev/null +++ b/experiments/embedded_vision_net_ri/scripts/train_all_baselines.sh @@ -0,0 +1,31 @@ +#!/bin/bash +## +## Copyright (c) 2023 Hannah contributors. +## +## This file is part of hannah. +## See https://github.com/ekut-es/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## + +EXPERIMENT="baseline" +MODELS="timm_resnet18 timm_resnet50 timm_resnet152 timm_efficientnet_lite1 timm_mobilenetv3_small_100 timm_mobilenetv3_small_075 timm_mobilenetv3_large_100 timm_focalnet_base_srf" + +export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/ + +SBATCH="" +#SBATCH="sbatch --job-name=baseline" + +for MODEL in ${MODELS}; do + sbatch --job-name=$MODEL scripts/train_baselines_slurm.sh -m $MODEL +done diff --git a/experiments/embedded_vision_net_ri/scripts/train_all_baselines_submitit.sh b/experiments/embedded_vision_net_ri/scripts/train_all_baselines_submitit.sh new file mode 100755 index 00000000..14f566be --- /dev/null +++ b/experiments/embedded_vision_net_ri/scripts/train_all_baselines_submitit.sh @@ -0,0 +1,29 @@ +#!/bin/bash +## +## Copyright (c) 2023 Hannah contributors. +## +## This file is part of hannah. +## See https://github.com/ekut-es/hannah for further info. +## +## Licensed under the Apache License, Version 2.0 (the "License"); +## you may not use this file except in compliance with the License. +## You may obtain a copy of the License at +## +## http://www.apache.org/licenses/LICENSE-2.0 +## +## Unless required by applicable law or agreed to in writing, software +## distributed under the License is distributed on an "AS IS" BASIS, +## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +## See the License for the specific language governing permissions and +## limitations under the License. +## + +EXPERIMENT="baseline" +MODEL="timm_resnet18,timm_resnet50,timm_resnet152,timm_efficientnet_lite1,timm_mobilenetv3_small_100,timm_mobilenetv3_small_075,timm_mobilenetv3_large_100" + +export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/ + +hannah-train experiment_id=$EXPERIMENT model=$MODEL hydra/launcher=ml_cloud_4gpu \ + hydra.sweep.dir='${output_dir}/${experiment_id}/' hydra.sweep.subdir='${model.name}' \ + module.num_workers=8 module.batch_size=16 trainer.gpus=4 trainer=sharded \ + -m diff --git a/hannah/conf/nas/aging_evolution_nas.yaml b/hannah/conf/nas/aging_evolution_nas.yaml index 897bd889..e71d9844 100644 --- a/hannah/conf/nas/aging_evolution_nas.yaml +++ b/hannah/conf/nas/aging_evolution_nas.yaml @@ -25,10 +25,10 @@ defaults: _target_: hannah.nas.search.search.DirectNAS budget: 2000 n_jobs: 10 -presample: False +presample: True total_candidates: 100 -num_selected_candidates: 10 +num_selected_candidates: 20 bounds: - val_error: 0.06 + val_error: 0.12 total_macs: 128000000 total_weights: 500000 diff --git a/hannah/conf/nas/sampler/aging_evolution.yaml b/hannah/conf/nas/sampler/aging_evolution.yaml index f0cacb21..761b04e4 100644 --- a/hannah/conf/nas/sampler/aging_evolution.yaml +++ b/hannah/conf/nas/sampler/aging_evolution.yaml @@ -1,3 +1,3 @@ _target_: hannah.nas.search.sampler.aging_evolution.AgingEvolutionSampler bounds: {} -population_size: 100 \ No newline at end of file +population_size: 20 \ No newline at end of file diff --git a/hannah/models/embedded_vision_net/models.py b/hannah/models/embedded_vision_net/models.py index d628b371..f6d1b761 100644 --- a/hannah/models/embedded_vision_net/models.py +++ b/hannah/models/embedded_vision_net/models.py @@ -114,15 +114,14 @@ def stem(input, kernel_size, stride, out_channels): @scope def classifier_head(input, num_classes): - out = adaptive_avg_pooling(input) - # out = input + out = choice(input, adaptive_avg_pooling) out = linear(out, num_classes) return out def search_space(name, input, num_classes=10): - out_channels = IntScalarParameter(4, 64, name='out_channels') - kernel_size = CategoricalParameter([1, 3, 5], name='kernel_size') + out_channels = IntScalarParameter(32, 256, step_size=4, name='out_channels') + kernel_size = CategoricalParameter([3, 5, 7, 9], name='kernel_size') stride = CategoricalParameter([1, 2], name='stride') expand_ratio = IntScalarParameter(1, 6, name='expand_ratio') reduce_ratio = IntScalarParameter(1, 6, name='reduce_ratio') @@ -139,7 +138,7 @@ def search_space(name, input, num_classes=10): exits.append(out) out = dynamic_depth(*exits, switch=num_blocks) - out = classifier_head(out, num_classes=num_classes) # FIXME: Configure num_classes automatically + out = classifier_head(out, num_classes=num_classes) strides = [v for k, v in out.parametrization(flatten=True).items() if k.split('.')[-1] == 'stride'] total_stride = expr_product(strides) diff --git a/hannah/nas/eval/__main__.py b/hannah/nas/eval/__main__.py index f9d78e47..33631931 100644 --- a/hannah/nas/eval/__main__.py +++ b/hannah/nas/eval/__main__.py @@ -36,7 +36,7 @@ def main(config): logger.info("Current working directory %s", os.getcwd()) result_metrics, parameters = prepare_summary( config.data, - base_path=hydra.utils.get_original_cwd(), + base_dir=hydra.utils.get_original_cwd(), force=config.get("force", False), ) diff --git a/hannah/nas/eval/extract.py b/hannah/nas/eval/extract.py index 2b8ae3ca..d6d9884f 100644 --- a/hannah/nas/eval/extract.py +++ b/hannah/nas/eval/extract.py @@ -22,7 +22,7 @@ import pandas as pd import yaml from genericpath import exists -from hannah_optimizer.utils import is_pareto +from hannah.nas.utils import is_pareto logger = logging.getLogger("nas_eval.extract") diff --git a/hannah/nas/eval/prepare.py b/hannah/nas/eval/prepare.py index f5b4a80c..ff55461b 100644 --- a/hannah/nas/eval/prepare.py +++ b/hannah/nas/eval/prepare.py @@ -65,7 +65,7 @@ def prepare_summary( parameters_all = {} for name, source in data.items(): logger.info(" Extracting design points for task: %s", name) - history_path = base_path / source / "history.pkl" + history_path = base_path / source / "history.yml" if history_path.suffix == ".yml": with history_path.open("r") as yaml_file: diff --git a/hannah/nas/search/sampler/aging_evolution.py b/hannah/nas/search/sampler/aging_evolution.py index 7cf10314..1096e515 100644 --- a/hannah/nas/search/sampler/aging_evolution.py +++ b/hannah/nas/search/sampler/aging_evolution.py @@ -28,6 +28,7 @@ from hannah.nas.parameters.parameters import CategoricalParameter, FloatScalarParameter, IntScalarParameter from hannah.nas.parameters.parametrize import set_parametrization from hannah.nas.search.sampler.mutator import ParameterMutator +from hannah.nas.search.utils import np_to_primitive from ...parametrization import SearchSpace from ...utils import is_pareto @@ -122,7 +123,7 @@ def next_parameters(self): def tell_result(self, parameters, metrics): "Tell the result of a task" - + parameters = np_to_primitive(parameters) result = SearchResult(len(self.history), parameters, metrics) self.history.append(result)