Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix erroranalysis conversion exception with object detection task for complex pred Y and true Y data and numpy>=1.26.0 by specifying object type #2594

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions erroranalysis/erroranalysis/_internal/surrogate_error_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
DEFAULT_MIN_CHILD_SAMPLES = 20
CACHED_SUBTREE_FEATURES = 'cache_subtree_features'
LEAF_VALUE = 'leaf_value'
OBJECT = 'object'
PREDICTION = 'prediction'
RAW_PREDICTION = 'rawPrediction'
PROBABILITY = 'probability'
Expand Down Expand Up @@ -341,10 +342,20 @@ def get_surrogate_booster_local(filtered_df, analyzer, is_model_analyzer,
diff = pred_y - true_y
if not isinstance(diff, np.ndarray):
diff = np.array(diff)

# Note: if direct conversion fails, for more complex data scenarios like
# object detection we need to convert to an object type for newer versions
# of numpy>=1.26.0
if not isinstance(pred_y, np.ndarray):
pred_y = np.array(pred_y)
try:
pred_y = np.array(pred_y)
except ValueError:
pred_y = np.array(pred_y, dtype=OBJECT)
if not isinstance(true_y, np.ndarray):
true_y = np.array(true_y)
try:
true_y = np.array(true_y)
except ValueError:
true_y = np.array(true_y, dtype=OBJECT)
if is_pandas:
input_data = input_data.to_numpy(copy=True)

Expand Down
3 changes: 2 additions & 1 deletion erroranalysis/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ pytest-mock==3.6.1
requirements-parser==0.2.0
rai-test-utils[object_detection]>=0.4.2
scikit-learn<=1.5.1
interpret-core[required]<=0.3.2
interpret-core[required]<=0.3.2
ml-wrappers
108 changes: 106 additions & 2 deletions erroranalysis/tests/test_surrogate_error_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@
import pandas as pd
import pytest
from common_utils import replicate_dataset
from ml_wrappers import wrap_model

from erroranalysis._internal.cohort_filter import filter_from_cohort
from erroranalysis._internal.constants import (ARG, COLUMN, COMPOSITE_FILTERS,
DIFF, LEAF_INDEX, METHOD,
OPERATION, PRED_Y, ROW_INDEX,
SPLIT_FEATURE, SPLIT_INDEX,
TRUE_Y, CohortFilterMethods,
CohortFilterOps, Metrics,
ModelTask, regression_metrics)
CohortFilterOps, ImageColumns,
Metrics, ModelTask,
regression_metrics)
from erroranalysis._internal.error_analyzer import (ModelAnalyzer,
PredictionsAnalyzer)
from erroranalysis._internal.surrogate_error_tree import (
Expand All @@ -28,12 +30,22 @@
create_adult_census_data, create_binary_classification_dataset,
create_cancer_data, create_diabetes_data, create_iris_data,
create_simple_titanic_data)
from rai_test_utils.datasets.vision import (
get_images, load_fridge_object_detection_dataset)
from rai_test_utils.models.model_utils import create_models_classification
from rai_test_utils.models.sklearn import (
create_kneighbors_classifier, create_sklearn_random_forest_regressor,
create_titanic_pipeline)
from rai_test_utils.models.torch import get_object_detection_fridge_model
from raiutils.exceptions import UserConfigValidationException

try:
import torch # noqa: F401
import torchvision # noqa: F401
pytorch_installed = True
except ImportError:
pytorch_installed = False

SIZE = 'size'
PARENTID = 'parentId'
ERROR = 'error'
Expand Down Expand Up @@ -65,6 +77,71 @@ def predict(self, X):
return np.zeros((X.shape[0], 1))


class SimplifiedWrappedIndexPredictorModel:
"""Wraps model that uses index to retrieve image data for making
predictions. Simplified version of the one in responsibleai-vision
package."""

def __init__(self, model, dataset, image_mode):
"""Initialize the WrappedIndexPredictorModel.

:param model: The model to wrap.
:type model: object
:param dataset: The dataset to use for making predictions.
:type dataset: pandas.DataFrame
:param image_mode: The mode to open the image in.
See pillow documentation for all modes:
https://pillow.readthedocs.io/en/stable/handbook/concepts.html
:type image_mode: str
"""
self.model = model
self.dataset = dataset
self.image_mode = image_mode
test = get_images(self.dataset, self.image_mode, None)
self.predictions = self.model.predict(test)
self.predict_proba = self.model.predict_proba(test)

def index_predictions(self, index, predictions):
"""Index the predictions.

:param index: The index to use.
:type index: list
:param predictions: The predictions to index.
:type predictions: list
"""
if not isinstance(index, list):
index = list(index)
if isinstance(predictions, list):
predictions = [predictions[i] for i in index]
else:
predictions = predictions[index]
return predictions

def predict(self, X):
"""Predict the class labels for the provided data.

:param X: Data to predict the labels for.
:type X: pandas.DataFrame
:return: Predicted class labels.
:rtype: list
"""
index = X.index
predictions = self.index_predictions(index, self.predictions)
return predictions

def predict_proba(self, X):
"""Predict the class probabilities for the provided data.

:param X: Data to predict the probabilities for.
:type X: pandas.DataFrame
:return: Predicted class probabilities.
:rtype: list[list]
"""
index = X.index
pred_proba = self.index_predictions(index, self.predict_proba)
return pred_proba


class TestSurrogateErrorTree(object):

@pytest.mark.parametrize('analyzer_type', [AnalyzerType.MODEL,
Expand Down Expand Up @@ -319,6 +396,33 @@ def test_invalid_multidim_label(self):
run_error_analyzer(model, X_test, y_test, feature_names,
AnalyzerType.MODEL)

@pytest.mark.skipif(not pytorch_installed,
reason="requires torch/torchvision")
def test_surrogate_error_tree_object_detection(self):
model_task = ModelTask.OBJECT_DETECTION
model = get_object_detection_fridge_model()

dataset = load_fridge_object_detection_dataset()
dataset = dataset.iloc[:3]
X_test = dataset[[ImageColumns.IMAGE]]
y_test = dataset[[ImageColumns.LABEL]]
feature_names = [ImageColumns.IMAGE]
image_mode = 'RGB'
model = wrap_model(model, X_test, model_task)
model = SimplifiedWrappedIndexPredictorModel(model, X_test, image_mode)
extracted_feature_names = ["mean_pixel_value", "is_cool_image"]
dummy_data = [[141, True], [54, False], [212, True]]
ext_dataset = pd.DataFrame(data=dummy_data,
columns=extracted_feature_names)

run_error_analyzer(model,
ext_dataset,
y_test,
feature_names,
AnalyzerType.MODEL,
categorical_features=[],
model_task=model_task)


def run_error_analyzer(model, X_test, y_test, feature_names,
analyzer_type, categorical_features=None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

# domain mapped session for reuse
_requests_sessions = {}
IMAGE = "image"
LABEL = "label"


def _get_retry_session(url):
Expand Down Expand Up @@ -105,7 +107,6 @@ def get_images(dataset, image_mode, transformations=None):
:return: The images.
:rtype: numpy.ndarray
"""
IMAGE = "image"
IMAGE_URL = "image_url"

column_names = dataset.columns
Expand Down Expand Up @@ -204,11 +205,9 @@ def load_fridge_object_detection_dataset():
labels = load_fridge_object_detection_dataset_labels()

# get all file names into a pandas dataframe with the labels
data = pd.DataFrame(columns=["image", "label"])
rows = []
for i, file in enumerate(os.listdir("./data/odFridgeObjects/" + "images")):
image_path = "./data/odFridgeObjects/" + "images" + "/" + file
data = data.append({"image": image_path,
"label": labels[i]}, # folder
ignore_index=True)

rows.append({IMAGE: image_path, LABEL: labels[i]})
data = pd.DataFrame(rows, columns=[IMAGE, LABEL])
return data
Loading