forked from yxtay/char-rnn-text-generation
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhyperparameter-search.py
180 lines (155 loc) · 6.59 KB
/
hyperparameter-search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import os
import sys
import time
import pprint
import pickle
import csv
import utils
import train
import numpy as np
from hyperopt import fmin, tpe, rand, hp, STATUS_OK, STATUS_FAIL
# the number of individual models to train using different hyperparameters
NUM_TRIALS = 40
# the maximum number of epochs per trial
MAX_EPOCHS_PER_TRIAL = 10
TRAIN_TEXT_PATH = os.path.join('data', 'tweets-split', 'train-80k.txt')
VAL_TEXT_PATH = os.path.join('data', 'tweets-split', 'validate-20k.txt')
# trials will be saved in this directory in separate folders specified by their
# trial number (e.g. 1/, 2/, 3/, 4/, etc.)
EXPERIMENT_PATH = os.path.join('checkpoints',
'{}-trials-{}-epochs'.format(NUM_TRIALS, MAX_EPOCHS_PER_TRIAL))
# each trial will sample values from this search space to train a new model.
# see hyperopt's documentation if you would like to add different types of
# sampling configurations.
SEARCH_SPACE = {
'batch_size': hp.choice('batch_size', [16, 32, 64, 128, 256, 512]),
'drop_rate': 0.0,
'embedding_size': hp.choice('embedding_size', [16, 32, 64, 128, 256]),
'num_layers': 1, # you can replace these constants with hp.choice() or hp.uniform(), etc.
'rnn_size': 512,
'seq_len': hp.choice('seq_len', [16, 32, 64, 128, 256]),
'optimizer': hp.choice('optimizer', ['rmsprop',
'adagrad',
'adadelta',
'adam']),
'clip_norm': hp.choice('clip_norm', [0.0, 5.0])
}
# Use "Tree of Parzen Estimators" as the search algorithm by default.
# You can switch to "Random Search" instead with:
# SEARCH_ALGORITHM=rand.suggest
SEARCH_ALGORITHM=tpe.suggest
def main():
# the code for one trial. It's given one sample configuration of
# hyperparameters and trains one model, returning a results object.
# this function is called by hyperopt's fmin() function.
def trial(params):
global TRAIN_TEXT_PATH, VAL_TEXT_PATH, MAX_EPOCHS_PER_TRIAL
nonlocal trial_num, trials
params['num_epochs'] = MAX_EPOCHS_PER_TRIAL
params['checkpoint_dir'] = os.path.join(EXPERIMENT_PATH, str(trial_num))
os.makedirs(params['checkpoint_dir'])
# let's time the model training and print the hyperparameter sample to
# the console.
then = time.time()
pprint.pprint(params)
status = STATUS_OK
error = None
train_time = 0
num_epochs = 0
# These are the default values that are returned if an error is raised
# during the trial. We set these default "fake", values to be large
# so that we can compare them against the true loss using min() below.
val_loss = 100
loss = 100
# train the model, catching any errors as a failed experiment
try:
model, loss, val_loss, num_epochs = train.train(params,
TRAIN_TEXT_PATH,
VAL_TEXT_PATH)
except Exception as err:
status = STATUS_FAIL
error = err
print(err)
results = {
# use val_loss as the metric hyperopt will attempt to minimize
'loss': val_loss,
'status': status,
'train_loss': loss,
'num_epochs': num_epochs,
'train_time': time.time() - then,
'trial_num': trial_num,
'error': error
}
# save this trial in a list with the others
trials.append([params, results])
# save the trial results to csv after each trial
save_hp_checkpoint(EXPERIMENT_PATH, trials)
trial_num += 1
return results
print("corpus length: {}".format(os.path.getsize(TRAIN_TEXT_PATH)))
print('vocabsize: ', utils.VOCAB_SIZE)
trial_num = 1
trials = []
# we don't want to accidentally overwrite a past search, so we'll exit
# if the EXPERIMENT_PATH already exists. Otherwise, we'll create it and
# keep going.
if os.path.isdir(EXPERIMENT_PATH):
print('EXPERIMENT_PATH {} already exists, exiting.'.format(EXPERIMENT_PATH))
exit(1)
else:
os.makedirs(EXPERIMENT_PATH)
# use hyperopt's fmin() to sample from the hyperparameter space and run our
# trials. It will search for a hyperparameter configuration that minimizes
# our val_loss.
fmin(fn=trial,
space=SEARCH_SPACE,
algo=SEARCH_ALGORITHM,
max_evals=NUM_TRIALS)
# save trials to csv, ranked by loss ascending
def save_hp_checkpoint(experiment_path, trials):
save_trials(os.path.join(experiment_path, 'trials.pickle'), trials)
ranked = rank_trials(trials)
save_trials_as_csv(os.path.join(experiment_path, 'trials.csv'), ranked)
# rank trials by loss, ascending
def rank_trials(trials):
sorted_indices = np.argsort([result['loss'] for params, result in trials])
ranked = []
for index in sorted_indices:
ranked.append(trials[index])
return ranked
def save_trials_as_csv(filename, ranked_trials):
with open(filename, 'w') as f:
fieldnames = ['rank', 'trial_num', 'val_loss', 'train_loss',
'num_epochs', 'avg_epoch_seconds', 'batch_size', 'drop_rate',
'embedding_size', 'num_layers', 'rnn_size', 'seq_len',
'optimizer', 'clip_norm', 'status']
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
rank = 1
for trial, results in ranked_trials:
writer.writerow({
'rank': rank,
'trial_num': results['trial_num'],
'val_loss': results['loss'],
'train_loss': results['train_loss'],
'num_epochs': results['num_epochs'],
'avg_epoch_seconds': int(results['train_time'] / max(results['num_epochs'], sys.float_info.epsilon)),
'batch_size': trial['batch_size'],
'drop_rate': trial['drop_rate'],
'embedding_size': trial['embedding_size'],
'num_layers': trial['num_layers'],
'rnn_size': trial['rnn_size'],
'seq_len': trial['seq_len'],
'optimizer': trial['optimizer'],
'clip_norm': trial['clip_norm'],
'status': results['status']
})
rank += 1
def save_trials(filename, trials):
with open(filename, 'wb') as f:
pickle.dump(trials, f)
def load_trials(filename):
with open(filename, 'rb') as f:
return pickle.load(f)
if __name__ == "__main__":
main()