diff --git a/01_ssh_docker_configuration/launch_instrumentation.py b/01_ssh_docker_configuration/launch_instrumentation.py index 0c26be5..39c68e9 100644 --- a/01_ssh_docker_configuration/launch_instrumentation.py +++ b/01_ssh_docker_configuration/launch_instrumentation.py @@ -1,29 +1,24 @@ -def launch(lr, model_name="LeNet"): - print(f"training model {model_name} with {lr}") - print('...') - print('This is working!!') +def train_fn(seed=None): + from time import sleep + from ml_logger import logger - print('now try to import ml-logger') - from ml_logger import logger, RUN - print('import succeeded') + logger.print('this is running') + logger.print(f"The exp seed is: {seed}", color="green") - print(logger) + logger.print('This is sleeping...', color="yellow") + sleep(5) + logger.print('done!') - print('now inspec the RUN object: RUN', vars(RUN)) - assert RUN.prefix == "set_from_outside" - assert RUN.job_name == "ml-logger-test-job" - -if __name__ == "__main__": +if __name__ == '__main__': import jaynes - from ml_logger import RUN, instr + from ml_logger import logger, instr - RUN.prefix = "set_from_outside" - # need to set the job name too - RUN.job_name = "ml-logger-test-job" jaynes.config() - thunk = instr(launch) - jaynes.run(thunk, lr=1e-3) - # this line allows you to keep the pipe open and hear back from the remote instance. - jaynes.listen(200) + for i in range(5): + thunk = instr(train_fn) + jaynes.add(thunk, seed=i * 100) + + jaynes.execute() + jaynes.listen() diff --git a/docker/jaynes/Makefile b/docker/jaynes/Makefile index f1fbe67..3c6bde4 100644 --- a/docker/jaynes/Makefile +++ b/docker/jaynes/Makefile @@ -14,3 +14,5 @@ test-aws: release: docker image tag jaynes episodeyang/jaynes:$(version) docker push episodeyang/jaynes:$(version) + docker image tag jaynes episodeyang/jaynes:latest + docker push episodeyang/jaynes:latest diff --git a/docker/pytorch/Dockerfile b/docker/pytorch/Dockerfile index 02cd491..f5fc52c 100644 --- a/docker/pytorch/Dockerfile +++ b/docker/pytorch/Dockerfile @@ -1,4 +1,4 @@ -FROM nvidia/cudagl:11.3.0-devel-ubuntu18.04 +FROM nvidia/cudagl:11.6.0-devel-ubuntu20.04 FROM python:3.8 ENV PIP_NO_CACHE_DIR=1 @@ -28,6 +28,6 @@ RUN yes | apt install \ RUN yes | apt install libcgal-qt5-dev RUN pip install --upgrade pip RUN pip install pytest pytest-forked lz4 pyyaml -RUN pip install torch==1.8.1+cu111 torchvision==0.9.1+cu111 torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html +RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 RUN pip install datasets einops sklearn torchinfo tqdm RUN pip install jaynes ml-logger waterbear params-proto functional-notations \ No newline at end of file diff --git a/docker/pytorch/Makefile b/docker/pytorch/Makefile index 37b08df..4a3431b 100644 --- a/docker/pytorch/Makefile +++ b/docker/pytorch/Makefile @@ -9,3 +9,5 @@ run: release: docker tag pytorch episodeyang/pytorch:$(version) docker push episodeyang/pytorch:$(version) + docker tag pytorch episodeyang/pytorch:latest + docker push episodeyang/pytorch:latest