-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmodel.py
52 lines (44 loc) · 2.15 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import os
import json
import pandas as pd
from matplotlib import pyplot as plt
from catboost import CatBoostClassifier
class CatBoostModel:
def __init__(self, args, output_dir):
self.output_dir = output_dir
self.model = CatBoostClassifier(iterations=args.iteration,
random_seed=args.seed,
custom_metric=["AUC", "Accuracy"],
eval_metric="AUC",
early_stopping_rounds=args.early_stopping,
train_dir=self.output_dir,
learning_rate=args.lr,
task_type="GPU",
devices="0")
def fit(self, X, y, cat_features, eval_set, verbose=100):
self.model.fit(X, y,
cat_features=cat_features,
eval_set=eval_set,
verbose=verbose)
def save_features(self, features, cat_features, feature_descript):
# feature names
with open(os.path.join(self.output_dir, "features.json"), "w") as f:
feature_dict = {
"num_feats": len(features),
"description": feature_descript,
"FEAT": features,
"CAT_FEAT": cat_features
}
json.dump(feature_dict, f)
# save feature importance (both text & image)
importance_df = (
pd.DataFrame({
"feature_name": self.model.feature_names_,
"importances": self.model.feature_importances_})
.sort_values("importances", ascending=False)
.reset_index(drop=True))
importance_df.to_csv(os.path.join(self.output_dir, "featrue_importances.csv"))
importance_df.plot.barh(x="feature_name", y="importances", figsize=(15, 20)).invert_yaxis()
plt.savefig(os.path.join(self.output_dir, "feature_importances.png"))
def inference(self, X_test):
return self.model.predict_proba(X_test)[:, 1]