-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmodel_evaluation.py
266 lines (215 loc) · 13.6 KB
/
model_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
import argparse
import numpy as np
import pandas as pd
import os, sys
import pandas as pd
from tensorflow.keras.models import load_model
from functions.tf_loss_custom import compute_loss_mae, compute_loss_raw, eval_loss_raw
from functions.sub_visualization import mortality_heatmap_grid, plot_implied_survival_curve, mortality_heatmap_differences, heatmap_check_homogeneity, plot_economic_evaluation
from functions.sub_backtesting import check_exploded_gradients
from functions.sub_actuarial import neural_premium_zillmerisation
from global_vars import T_MAX, AGE_RANGE, INIT_AGE_RANGE, ALPHA, BETA, GAMMA
from global_vars import path_data, path_dav, path_hyperopt_male, path_hyperopt_female
from global_vars import path_models_resnet_hpsearch_male, path_models_resnet_hpsearch_female, path_models_resnet_hpsearch_none
def run_visual_eval(baseline_sex = 'female', tuning_type = 'manual', path_tag = ''):
'''
Run analysis of results for given DAV-baseline, including
1) Implied survival curve
2) heatmap: fitted probs vs DAV baseline
3) heatmap: fitted probs, difference between gender and smoker-status combos.
Inputs:
-------
baseline_sex: string, either "female" or "male"
tuning_type: sring, either "manual" or "auto"
path_tag: run experiment for paths in the style of [path_model]+path_tag, i.e. avoid changing the path_model manually to explore other experiments
'''
# Optional: uncomment upon demand
# speed-up by setting mixed-precision -> disable for now as it causes dtype issues in compute_loss, specifically when concatenating ones and cum_prob
# policy = tf.keras.mixed_precision.Policy('mixed_float16')
# tf.keras.mixed_precision.set_global_policy(policy)
# ALternative: For numeric stability, set the default floating-point dtype to float64
# tf.keras.backend.set_floatx('float64')
assert baseline_sex in ['male', 'female']
assert tuning_type in ['manual', 'auto']
if baseline_sex == 'male':
if tuning_type == 'manual':
path_model = path_models_resnet_hpsearch_male+path_tag
else:
path_model = path_hyperopt_male
sex2 = 'female'
elif baseline_sex == 'female':
if tuning_type == 'manual':
path_model = path_models_resnet_hpsearch_female+path_tag
else:
path_model = path_hyperopt_female
sex2 = 'male'
else:
if tuning_type == 'manual':
path_model = path_models_resnet_hpsearch_none+path_tag
else:
#! Not implemented
raise ValueError('An automated HPSearch without the baseline model has not been implemented yet.')
# path_model = path_hyperopt_none
sex2 = None
# look at one individual model and visualize progress
try:
pmodel = load_model(os.path.join(path_model, r'model_best.h5'), compile=False)
except:
print('"model_best.h5" seems not to exist. Potentially, it has to manually be created first be copying and renaming the model of choice, which was found during HPTuning.')
raise ValueError('Loading model_best.h5 failed. Path reference: ' +str(os.path.join(path_model, r'model_best.h5')))
pmodel.compile(loss = compute_loss_mae, metrics=['mae'], optimizer = 'adam') # as we don't train anymore, specifics of the optimizer are irrelevant
p_survive = pd.read_csv(os.path.join(path_dav, r'DAV2008T{}.csv'.format(baseline_sex)), delimiter=';', header=None ).loc[:,0].values.reshape((-1,1))
p_survive2 = pd.read_csv(os.path.join(path_dav, r'DAV2008T{}.csv'.format(sex2)), delimiter=';', header=None ).loc[:,0].values.reshape((-1,1))
bool_grad = check_exploded_gradients(pmodel)
if bool_grad:
raise ValueError('NaN-parameter-values in model!')
# where did the training alter the underlying mortality-table?
_ = plot_implied_survival_curve(pmodel, dav_table = p_survive, dav_table2= p_survive2,
path_save = path_model, age_max = T_MAX,
baseline_tag= baseline_sex, age_range= AGE_RANGE )
# plot a heatmap of the
val_dict, _ = mortality_heatmap_grid(pmodel, p_survive, baseline_tag=baseline_sex, m=1, age_max=T_MAX, rnn_seq_len = 20, save_path= path_model, age_range=INIT_AGE_RANGE)
heatmap_check_homogeneity(val_dict, baseline_tag= baseline_sex, save_path=path_model, age_range=INIT_AGE_RANGE)
mortality_heatmap_differences(val_dict, baseline_tag=baseline_sex, save_path=path_model, age_range=INIT_AGE_RANGE)
print('heatmaps created and stored successfully in', path_model)
def run_econom_eval(baseline_sex ='male', tuning_type = 'manual', path_tag = '', kfolds = 1):
'''
Intrinsic backtesting. Use transition probabilities of calibrated neural network 'model_best' to compute premium values.
Results are displayed as absolute and relative errors, arranged with respect to selected policy features, such as initial age.
Inputs:
-------
baseline_sex: string, either "female" or "male"
tuning_type: string, either "manual" or "auto"
path_tag: run experiment for paths in the style of [path_model]+path_tag, i.e. avoid changing the path_model manually to explore other experiments
cv_tag: string. Helper tag for crossvalidation study. Ex.: cv_1 will
'''
assert baseline_sex in ['male', 'female', 'none']
assert tuning_type in ['manual', 'auto']
assert kfolds in (1,2), 'only no crossvalidation (1) or 2-fold cv (2) supported.'
for k in range(kfolds):
# tag which which slice the model was trained on, e.g. [model_name]_cv_0[.h5] and [x_train]_cv_0[.npy]
cv_train_tag = f'_cv_{k}' if kfolds>1 else ''
# validation tag. Select hold-out data for validation, e.g. [x_test]_cv_1[.npy]
cv_validation_tag = f'_cv_{(k+1)%2}' if kfolds>1 else ''
if baseline_sex == 'male':
if tuning_type == 'manual':
path_model = path_models_resnet_hpsearch_male+path_tag
else:
path_model = path_hyperopt_male
elif baseline_sex == 'female':
if tuning_type == 'manual':
path_model = path_models_resnet_hpsearch_female+path_tag
else:
path_model = path_hyperopt_female
else:
if tuning_type == 'manual':
path_model = path_models_resnet_hpsearch_none+path_tag
else:
#! Not implemented
raise ValueError('An automated HPSearch without the baseline model has not been implemented yet.')
# Note: tests were run but did not ecourage a more detailed analysis due to poor performance
# path_model = path_hyperopt_none
#### load test data, i.e. with premium values set to zero and cash-flows w/o premium-related payments
try:
with open(os.path.join(path_data, f'x_test_raw{cv_validation_tag}.npy'), 'rb') as f:
x_test_raw = np.load(f, allow_pickle=True)
# print('\t .. x_test_raw loaded. ', type(x_test_raw), ' of shape ', x_test_raw.shape)
with open(os.path.join(path_data, f'x_test{cv_validation_tag}.npy'), 'rb') as f:
x_test = np.load(f, allow_pickle=True)#.astype(np.float64)
# print('\t .. x_test loaded.', type(x_test), ' of shape ', x_test.shape)
# load cash-flow values (test-data w/o premium-related payments)
with open(os.path.join(path_data, f'y_test{cv_validation_tag}.npy'), 'rb') as f:
y_test = np.load(f, allow_pickle=True)#.astype(np.float64)
# print('\t .. y_test loaded.', type(y_test), ' of shape ', y_test.shape)
# # backtesting: check consistency of training and test data
# with open(os.path.join(path_data, f'x_train{cv_validation_tag}.npy'), 'rb') as f:
# x_train = np.load(f, allow_pickle=True)
# # print('\t .. x_train loaded for backtesting.')
# load x_values from holdout set
with open(os.path.join(path_data, f'x_train_raw{cv_validation_tag}.npy'), 'rb') as f:
contract_features_raw = np.load(f, allow_pickle=True)
# print('\t .. contract_features_raw loaded. Premium-values will be used for economic evaluation.')
except Exception as error:
print('--------------')
print(
f'Error {error} while loading files. \n Make sure all files are available, e.g. by running create_data.py.')
print('--------------')
return
# sanity check for loaded data
# check_test_data(data_train=x_train, data_test=x_test)
# select contract-features for res-net
# recall format: x[['x', 'n', 't', 'ZahlweiseNum','Beginnjahr', 'Beginnmonat', 'GeschlechtNum', 'RauchertypNum', 'Leistung', 'tba']]
res_features = [0,3,6,7]
base_features = [0,3]
# look at one individual model and visualize progress
path_local = os.path.join(path_model, f'model_best{cv_train_tag}.h5')
print(f'Loading {path_local} ...')
pmodel = load_model(path_local, compile=False)
pmodel.compile(loss = compute_loss_mae, metrics=[compute_loss_raw], optimizer = 'adam') # as we don't train anymore, specifics of the optimizer are irrelevant
# Markov transition probabilities of calibrated model
if baseline_sex != 'none':
y_pred = pmodel.predict([x_test[:,:,base_features], x_test[:,:,res_features]])
else:
y_pred = pmodel.predict(x_test[:,:,res_features])
# lump sums for net-premiums P_0
# Note: cash-flows y_test are of the from APV_Premium - APV_Sum - APV_Cost with the premium-related payments set to 0
# hence, we need a negative sign to obtain the P_0 quantity
P_0 = -eval_loss_raw(y_true = y_test, y_pred = y_pred).numpy().reshape((-1,1))
# premium duration = t/ZahlweiseNum (bspw. 3/(1/12) = 36 [iterations])
# Note: we need to use raw, i.e. non-scaled values, here
premium_duration = x_test_raw[:,0,2].reshape((-1,1))/x_test_raw[:,0,3].reshape((-1,1))
P_true = contract_features_raw[:,0,-1].reshape((-1,1))
zill_factor = neural_premium_zillmerisation(y = y_pred, freq = x_test[:,0,3], v = GAMMA,
t_iter=premium_duration, alpha=ALPHA, beta=BETA) #model = pmodel,
P_pred = P_0/zill_factor
q = 0.995
# recall format: x[['x', 'n', 't', 'ZahlweiseNum','Beginnjahr', 'Beginnmonat', 'GeschlechtNum', 'RauchertypNum', 'Leistung', 'tba']]
name_lst, index_lst = [r'initial age $a_0$', r'premium value $P$', r'sum insured $S$', r'premium duration $t$', r'duration $n$', r'payment style $m$'], [0, -1, -2, 2, 1, 3]
for e in ['absolute', 'relative']:
plot_economic_evaluation(val_true=P_true, val_pred= P_pred, path_save=path_model,
error_type= e, baseline_tag= baseline_sex,
features_data=contract_features_raw,
features_id_lst=index_lst, features_str_lst=name_lst, q=q,
kfold_tag=cv_train_tag)
# create table with statistics
e_rel = (P_true-P_pred)/P_true
alphas = [0, 0.005, 0.1, 0.25, 0.5, 0.75, 0.9, 0.995, 1]
quantiles = np.quantile(e_rel, alphas)
stats = pd.DataFrame(data = None, columns = alphas)
stats.loc[baseline_sex+r' $q_\alpha$ [%]']=np.round(quantiles*100,2)
stats_path = os.path.join(path_model, r'{}{}_error_rel_stats.tex'.format(baseline_sex, cv_train_tag))
stats.to_latex(stats_path)
print(f'statistics for baseline sex {baseline_sex} and crossval with kfold {k+1} (out of {kfolds} kfolds). Note: Total kfolds=1 means no crossvalidation.')
print(stats)
print('stats computed and stored successfully at', stats_path)
if __name__ == '__main__':
# ---------------------
# load results of manual ('manual') or automated ('auto') HPTuning
# optional: load load as a user input when running the current file
parser = argparse.ArgumentParser(
description="Input args for hyperopt HPTuning"
)
parser.add_argument(
"--mode",
type=str,
default='manual',
help="Indicate if results from manual ('manual') or automated ('auto') HPTuning should be loaded.",
)
args = parser.parse_args()
mode = args.mode
import warnings
import logging
warnings.filterwarnings("ignore")
logging.getLogger('matplotlib').setLevel(
level=logging.CRITICAL) # supress postscript latency warnings when saving images in an .eps-format
for gender in ['female', 'male']:
print('####################################################')
# optional: uncomment to loop over more layer-settings of manual HPSearch
for tag in [
'_50_50_50_50_50' # '_40_40_20', '_50_50_50', '_50_50_50_50', '_50_50_50_50_50', '_50_50_50_50_50_50'
]:
# create all qualitative plots
run_visual_eval(baseline_sex=gender, tuning_type=mode, path_tag=tag)
print('\t layer widths: ' + tag)
# perform economic backtesting
run_econom_eval(baseline_sex=gender, tuning_type=mode, path_tag=tag)