Skip to content

Commit

Permalink
Add training script
Browse files Browse the repository at this point in the history
  • Loading branch information
cgerum committed Nov 17, 2023
1 parent 69f9f6e commit 371645b
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 0 deletions.
60 changes: 60 additions & 0 deletions experiments/rhode_island/scripts/train_slurm.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/bin/bash
##
## Copyright (c) 2023 Hannah contributors.
##
## This file is part of hannah.
## See https://github.com/ekut-es/hannah for further info.
##
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##

#SBATCH --job-name=rhode_island_baseline

#resources:

#SBATCH --partition=gpu-2080ti
# the slurm partition the job is queued to.
# FIXME: test if preemptable is avallable

#SBATCH --nodes=1
# requests that the cores are all on one node

#SBATCH --gres=gpu:rtx2080ti:1
#the job can use and see 4 GPUs (8 GPUs are available in total on one node)

#SBATCH --time=4:00:00
# the maximum time the scripts needs to run (720 minutes = 12 hours)

#SBATCH --error=jobs/%j.err
# write the error output to job.*jobID*.err

#SBATCH --output=jobs/%j.out
# write the standard output to your home directory job.*jobID*.out

#SBATCH --mail-type=ALL
#write a mail if a job begins, ends, fails, gets requeued or stages out

#SBATCH [email protected]
# your mail address


# Send signale 90 seconds before training ends
# SBATCH --signal=SIGUSR1@90

#Script
echo "Job information"
scontrol show job $SLURM_JOB_ID

export HANNAH_DATA_FOLDER=/mnt/qb/datasets/STAGING/bringmann/datasets/

srun hannah-train experiment_id=baseline dataset.data_folder=$HANNAH_DATA_FOLDER module.num_workers=8
5 changes: 5 additions & 0 deletions scripts/ml_cloud_ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@ srun --job-name update_env --mincpus 4 --time 01:00:00 poetry install -E vision

pushd experiments/cifar10
sbatch scripts/train_slurm.sh
popd

pushd experiments/rhode_island
sbatch scripts/train_slurm.sh
popd

0 comments on commit 371645b

Please sign in to comment.