From 66e33cb59eb5e10db4bd275f96e6267584e7f6a8 Mon Sep 17 00:00:00 2001 From: Ilya Matiach Date: Fri, 26 Jan 2024 17:00:49 -0500 Subject: [PATCH] fix builds by supporting sparse_output renamed parameter in OneHotEncoder in new scikit-learn version update (#2507) --- .github/workflows/CI-python.yml | 6 ++++++ ...ranalysis-interpretability-dashboard-census.ipynb | 10 +++++++++- ...oard-housing-classification-model-debugging.ipynb | 12 ++++++++++-- ...ponsibleaidashboard-housing-decision-making.ipynb | 12 ++++++++++-- .../models/sklearn/sklearn_model_utils.py | 11 +++++++++-- 5 files changed, 44 insertions(+), 7 deletions(-) diff --git a/.github/workflows/CI-python.yml b/.github/workflows/CI-python.yml index fcaae683e0..b9d9485a9d 100644 --- a/.github/workflows/CI-python.yml +++ b/.github/workflows/CI-python.yml @@ -58,6 +58,12 @@ jobs: pip install -v -e . working-directory: ${{ matrix.packageDirectory }} + - if: ${{ (matrix.packageDirectory == 'erroranalysis') || (matrix.packageDirectory == 'responsibleai') }} + name: Install rai_test_utils locally until next version is released + run: | + pip install -v -e . + working-directory: rai_test_utils + - name: Pip freeze run: | pip freeze > installed-requirements-dev.txt diff --git a/notebooks/individual-dashboards/erroranalysis-dashboard/erroranalysis-interpretability-dashboard-census.ipynb b/notebooks/individual-dashboards/erroranalysis-dashboard/erroranalysis-interpretability-dashboard-census.ipynb index d8ad57e2f0..d5ba5c57b4 100644 --- a/notebooks/individual-dashboards/erroranalysis-dashboard/erroranalysis-interpretability-dashboard-census.ipynb +++ b/notebooks/individual-dashboards/erroranalysis-dashboard/erroranalysis-interpretability-dashboard-census.ipynb @@ -118,11 +118,19 @@ "metadata": {}, "outputs": [], "source": [ + "from packaging import version\n", + "import sklearn\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "\n", + "# for older scikit-learn versions use sparse, for newer sparse_output:\n", + "if version.parse(sklearn.__version__) < version.parse('1.2'):\n", + " ohe_params = {\"sparse\": False}\n", + "else:\n", + " ohe_params = {\"sparse_output\": False}\n", + "\n", "def split_label(dataset):\n", " X = dataset.drop(['income'], axis=1)\n", " y = dataset[['income']]\n", @@ -141,7 +149,7 @@ " ])\n", " cat_pipe = Pipeline([\n", " ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),\n", - " ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))\n", + " ('cat_encoder', OneHotEncoder(handle_unknown='ignore', **ohe_params))\n", " ])\n", " feat_pipe = ColumnTransformer([\n", " ('num_pipe', num_pipe, pipe_cfg['num_cols']),\n", diff --git a/notebooks/responsibleaidashboard/tabular/responsibleaidashboard-housing-classification-model-debugging.ipynb b/notebooks/responsibleaidashboard/tabular/responsibleaidashboard-housing-classification-model-debugging.ipynb index 490b33a119..ed1c234e2d 100644 --- a/notebooks/responsibleaidashboard/tabular/responsibleaidashboard-housing-classification-model-debugging.ipynb +++ b/notebooks/responsibleaidashboard/tabular/responsibleaidashboard-housing-classification-model-debugging.ipynb @@ -70,12 +70,20 @@ "metadata": {}, "outputs": [], "source": [ + "from packaging import version\n", "from raiutils.dataset import fetch_dataset\n", + "import sklearn\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.preprocessing import OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "\n", + "# for older scikit-learn versions use sparse, for newer sparse_output:\n", + "if version.parse(sklearn.__version__) < version.parse('1.2'):\n", + " ohe_params = {\"sparse\": False}\n", + "else:\n", + " ohe_params = {\"sparse_output\": False}\n", + "\n", "def split_label(dataset, target_feature):\n", " X = dataset.drop([target_feature], axis=1)\n", " y = dataset[[target_feature]]\n", @@ -93,7 +101,7 @@ " ])\n", " cat_pipe = Pipeline([\n", " ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),\n", - " ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))\n", + " ('cat_encoder', OneHotEncoder(handle_unknown='ignore', **ohe_params))\n", " ])\n", " feat_pipe = ColumnTransformer([\n", " ('num_pipe', num_pipe, pipe_cfg['num_cols']),\n", @@ -179,7 +187,7 @@ "source": [ "To use Responsible AI Dashboard, initialize a RAIInsights object upon which different components can be loaded.\n", "\n", - "RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.", + "RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.\n", "\n", "You may also create the `FeatureMetadata` container, identify any feature of your choice as the `identity_feature`, specify a list of strings of categorical feature names via the `categorical_features` parameter, and specify dropped features via the `dropped_features` parameter. The `FeatureMetadata` may also be passed into the `RAIInsights`." ] diff --git a/notebooks/responsibleaidashboard/tabular/responsibleaidashboard-housing-decision-making.ipynb b/notebooks/responsibleaidashboard/tabular/responsibleaidashboard-housing-decision-making.ipynb index 333e9a2b29..7beafba689 100644 --- a/notebooks/responsibleaidashboard/tabular/responsibleaidashboard-housing-decision-making.ipynb +++ b/notebooks/responsibleaidashboard/tabular/responsibleaidashboard-housing-decision-making.ipynb @@ -59,12 +59,20 @@ "metadata": {}, "outputs": [], "source": [ + "from packaging import version\n", "from raiutils.dataset import fetch_dataset\n", + "import sklearn\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.impute import SimpleImputer\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "\n", + "# for older scikit-learn versions use sparse, for newer sparse_output:\n", + "if version.parse(sklearn.__version__) < version.parse('1.2'):\n", + " ohe_params = {\"sparse\": False}\n", + "else:\n", + " ohe_params = {\"sparse_output\": False}\n", + "\n", "def split_label(dataset, target_feature):\n", " X = dataset.drop([target_feature], axis=1)\n", " y = dataset[[target_feature]]\n", @@ -83,7 +91,7 @@ " ])\n", " cat_pipe = Pipeline([\n", " ('cat_imputer', SimpleImputer(strategy='constant', fill_value='?')),\n", - " ('cat_encoder', OneHotEncoder(handle_unknown='ignore', sparse=False))\n", + " ('cat_encoder', OneHotEncoder(handle_unknown='ignore', **ohe_params))\n", " ])\n", " feat_pipe = ColumnTransformer([\n", " ('num_pipe', num_pipe, pipe_cfg['num_cols']),\n", @@ -148,7 +156,7 @@ "source": [ "To use Responsible AI Dashboard, initialize a RAIInsights object upon which different components can be loaded.\n", "\n", - "RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.", + "RAIInsights accepts the model, the full dataset, the test dataset, the target feature string and the task type string as its arguments.\n", "\n", "You may also create the `FeatureMetadata` container, identify any feature of your choice as the `identity_feature`, specify a list of strings of categorical feature names via the `categorical_features` parameter, and specify dropped features via the `dropped_features` parameter. The `FeatureMetadata` may also be passed into the `RAIInsights`." ] diff --git a/rai_test_utils/rai_test_utils/models/sklearn/sklearn_model_utils.py b/rai_test_utils/rai_test_utils/models/sklearn/sklearn_model_utils.py index 9866fad009..4721d4ce01 100644 --- a/rai_test_utils/rai_test_utils/models/sklearn/sklearn_model_utils.py +++ b/rai_test_utils/rai_test_utils/models/sklearn/sklearn_model_utils.py @@ -3,6 +3,8 @@ import numpy as np import pandas as pd +import sklearn +from packaging import version from sklearn import svm from sklearn.compose import ColumnTransformer from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor @@ -127,6 +129,11 @@ def conv(X): (conv(np.prod(x, axis=1)).reshape(-1, 1), conv(np.prod(x, axis=1)**2).reshape(-1, 1)) )) + # for older scikit-learn versions use sparse, for newer sparse_output: + if version.parse(sklearn.__version__) < version.parse('1.2'): + ohe_params = {"sparse": False} + else: + ohe_params = {"sparse_output": False} transformations = ColumnTransformer([ ("age_fare_1", Pipeline(steps=[ ('imputer', SimpleImputer(strategy='median')), @@ -137,8 +144,8 @@ def conv(X): ("embarked", Pipeline(steps=[ ("imputer", SimpleImputer(strategy='constant', fill_value='missing')), - ("encoder", OneHotEncoder(sparse=False))]), ["embarked"]), - ("sex_pclass", OneHotEncoder(sparse=False), ["sex", "pclass"]) + ("encoder", OneHotEncoder(**ohe_params))]), ["embarked"]), + ("sex_pclass", OneHotEncoder(**ohe_params), ["sex", "pclass"]) ]) clf = Pipeline(steps=[('preprocessor', transformations), ('classifier',