forked from vlimant/mpi_opt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhyperparameter_search_option0.py
117 lines (102 loc) · 4.05 KB
/
hyperparameter_search_option0.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from skopt import Optimizer
import subprocess
import time
import mpiLAPI
import numpy as np
import json
import os
def dummy_model(params):
import random
time.sleep(1)
return random.randint(0, 10)
def train_model_dummy(params):
n_nodes = "1"
model_arch_files = "mnist_arch.json"
train_files = "train_mnist.list"
test_files = "test_mnist.list"
n_epochs = "1"
# model = build_model(params)
# json_model = model.to_json()
json_model = "{}"
mpi_cmd = "mpirun" # "mpirun"
train_cmd = mpi_cmd + " -np " + n_nodes + " python ./mpi_learn/MPIDriver.py " + model_arch_files + " " + train_files + " " + \
test_files + " --loss categorical_crossentropy --tf --epochs " + n_epochs
try:
print("starting MPI process")
print(train_cmd)
time.sleep(1) # don't overload the cluster with mpi_run calls
output = subprocess.check_output(train_cmd, shell=True)
search_str = "Wrote trial information"
for line in output.split('\n'):
print("search output: " + line)
if line.startswith(search_str):
print("found line: " + line)
fname = line.split(' ')[-1]
with open(fname) as f:
hist = json.load(f)
fom = hist["history"]["0"]["val_loss"][-1]
print("fom: " + str(fom))
return float(fom)
except subprocess.CalledProcessError as err:
print("CalledProcessError: {}".format(err))
def train_model(params, n_nodes=1, n_epochs=3):
#model = mpiLAPI.test_densenet(depth=3*params[0]+4, growth_rate=params[1], dropout_rate=params[2], nb_filter=params[3], lr=10.0**np.int32(params[4]))
model = mpiLAPI.test_cnn(dropout=params[0], kernel_size=params[1], lr=10.**np.int32(params[2]))
mlapi = mpiLAPI.mpi_learn_api( model = model,
train_pattern = '/bigdata/shared/LCDJets_Remake/train/04*.h5',
val_pattern = '/bigdata/shared/LCDJets_Remake/val/020*.h5',
check_file = True
)
try:
output = mlapi.train(N=n_nodes,
trial_name = 'test',
features_name = 'Images',
labels_name = 'Labels',
batch = 2,
epoch = n_epochs,
verbose = True,
loss = 'categorical_crossentropy',
easgd = False,
early_stopping = 5
)
search_str = "Wrote trial information"
for line in output.split('\n'):
print("search output: " + line)
if line.startswith(search_str):
print("found line: " + line)
fname = line.split(' ')[-1]
with open(fname) as f:
hist = json.load(f)
fom = hist["history"]["0"]["val_loss"][-1]
print("fom: " + str(fom))
return float(fom)
except subprocess.CalledProcessError as err:
print("CalledProcessError: {}".format(err))
return np.inf
if __name__ == '__main__':
n_trials = 4
os.environ["CUDA_VISIBLE_DEVICES"]="4,5"
concurrent_train = 5
param_grid_cnn = [
(.0, .85), # dropout_rate
(3,10), # kernel_size
(-5, 1), # lr
]
param_grid_densenet = [
(3, 8), # depth
(1, 5), # growth_rate
(.0, .85), # dropout_rate
(32, 256), # nb_filters
(-5, 1), # lr
]
# https://scikit-optimize.github.io/#skopt.Optimizer
bayesian_opt = Optimizer(param_grid_cnn)
# TODO: how many do you want to spawn at the start?
for i in range(n_trials):
suggested = bayesian_opt.ask()
print(suggested)
y = train_model(suggested)
print(y)
# y = dummy_model(suggested)
bayesian_opt.tell(suggested, y)
print('iteration:', i, suggested, y)