Skip to content

Commit

Permalink
fixed denodogram + xgboost label encofding
Browse files Browse the repository at this point in the history
  • Loading branch information
peach-lucien committed Jun 4, 2024
1 parent 8bfdf34 commit 83dec3b
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
5 changes: 4 additions & 1 deletion hcga/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import shap
from sklearn.metrics import accuracy_score, mean_absolute_error
from sklearn.model_selection import RepeatedKFold, RepeatedStratifiedKFold, ShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tqdm import tqdm

from hcga.io import load_fitted_model, save_fitted_model
Expand Down Expand Up @@ -684,7 +684,10 @@ def classify_pairwise( # pylint: disable=too-many-locals

top_features = {}
for pair in tqdm(class_pairs):
L.info("Pairwise classification between classes %s and %s", str(pair[0]), str(pair[1]))
features_pair = features.loc[(features.label == pair[0]) | (features.label == pair[1])]
le = LabelEncoder()
features_pair.label = le.fit_transform(features_pair.label)
analysis_results = fit_model_kfold(
features_pair,
classifier,
Expand Down
2 changes: 1 addition & 1 deletion hcga/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def _plot_dendrogram_shap(
ax2 = plt.subplot(gs[1, 0])

cor = np.abs(X_red.corr())
Z = linkage(cor.to_numpy(), "ward")
Z = linkage(np.nan_to_num(cor.to_numpy()), "ward")
dn = dendrogram(Z, labels=X_red.columns, ax=ax1)
ax1.xaxis.set_ticklabels([])
ax1.set_ylabel("Euclidean Distance")
Expand Down

0 comments on commit 83dec3b

Please sign in to comment.