Skip to content

Commit

Permalink
Fix erroranalysis conversion exception with object detection task for…
Browse files Browse the repository at this point in the history
… complex pred Y and true Y data and numpy>=1.26.0 by specifying object type
  • Loading branch information
imatiach-msft committed Jan 15, 2025
1 parent dfa37be commit c90c654
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 10 deletions.
15 changes: 13 additions & 2 deletions erroranalysis/erroranalysis/_internal/surrogate_error_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
DEFAULT_MIN_CHILD_SAMPLES = 20
CACHED_SUBTREE_FEATURES = 'cache_subtree_features'
LEAF_VALUE = 'leaf_value'
OBJECT = 'object'
PREDICTION = 'prediction'
RAW_PREDICTION = 'rawPrediction'
PROBABILITY = 'probability'
Expand Down Expand Up @@ -341,10 +342,20 @@ def get_surrogate_booster_local(filtered_df, analyzer, is_model_analyzer,
diff = pred_y - true_y
if not isinstance(diff, np.ndarray):
diff = np.array(diff)

# Note: if direct conversion fails, for more complex data scenarios like
# object detection we need to convert to an object type for newer versions
# of numpy>=1.26.0
if not isinstance(pred_y, np.ndarray):
pred_y = np.array(pred_y)
try:
pred_y = np.array(pred_y)
except ValueError:
pred_y = np.array(pred_y, dtype=OBJECT)
if not isinstance(true_y, np.ndarray):
true_y = np.array(true_y)
try:
true_y = np.array(true_y)
except ValueError:
true_y = np.array(true_y, dtype=OBJECT)
if is_pandas:
input_data = input_data.to_numpy(copy=True)

Expand Down
106 changes: 104 additions & 2 deletions erroranalysis/tests/test_surrogate_error_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,35 @@
OPERATION, PRED_Y, ROW_INDEX,
SPLIT_FEATURE, SPLIT_INDEX,
TRUE_Y, CohortFilterMethods,
CohortFilterOps, Metrics,
ModelTask, regression_metrics)
CohortFilterOps, ImageColumns,
Metrics, ModelTask, regression_metrics)
from erroranalysis._internal.error_analyzer import (ModelAnalyzer,
PredictionsAnalyzer)
from erroranalysis._internal.surrogate_error_tree import (
TreeSide, cache_subtree_features, compute_error_tree,
create_surrogate_model, get_categorical_info, get_max_split_index,
traverse)
from ml_wrappers import wrap_model
from rai_test_utils.datasets.tabular import (
create_adult_census_data, create_binary_classification_dataset,
create_cancer_data, create_diabetes_data, create_iris_data,
create_simple_titanic_data)
from rai_test_utils.datasets.vision import (
get_images, load_fridge_object_detection_dataset)
from rai_test_utils.models.model_utils import create_models_classification
from rai_test_utils.models.sklearn import (
create_kneighbors_classifier, create_sklearn_random_forest_regressor,
create_titanic_pipeline)
from rai_test_utils.models.torch import get_object_detection_fridge_model
from raiutils.exceptions import UserConfigValidationException

try:
import torch # noqa: F401
import torchvision # noqa: F401
pytorch_installed = True
except ImportError:
pytorch_installed = False

SIZE = 'size'
PARENTID = 'parentId'
ERROR = 'error'
Expand Down Expand Up @@ -65,6 +76,70 @@ def predict(self, X):
return np.zeros((X.shape[0], 1))


class SimplifiedWrappedIndexPredictorModel:
"""Wraps model that uses index to retrieve image data for making
predictions. Simplified version of the one in responsibleai-vision package."""

def __init__(self, model, dataset, image_mode):
"""Initialize the WrappedIndexPredictorModel.
:param model: The model to wrap.
:type model: object
:param dataset: The dataset to use for making predictions.
:type dataset: pandas.DataFrame
:param image_mode: The mode to open the image in.
See pillow documentation for all modes:
https://pillow.readthedocs.io/en/stable/handbook/concepts.html
:type image_mode: str
"""
self.model = model
self.dataset = dataset
self.image_mode = image_mode
test = get_images(self.dataset, self.image_mode, None)
self.predictions = self.model.predict(test)
self.predict_proba = self.model.predict_proba(test)

def index_predictions(self, index, predictions):
"""Index the predictions.
:param index: The index to use.
:type index: list
:param predictions: The predictions to index.
:type predictions: list
"""
if not isinstance(index, list):
index = list(index)
if isinstance(predictions, list):
predictions = [predictions[i] for i in index]
else:
predictions = predictions[index]
return predictions

def predict(self, X):
"""Predict the class labels for the provided data.
:param X: Data to predict the labels for.
:type X: pandas.DataFrame
:return: Predicted class labels.
:rtype: list
"""
index = X.index
predictions = self.index_predictions(index, self.predictions)
return predictions

def predict_proba(self, X):
"""Predict the class probabilities for the provided data.
:param X: Data to predict the probabilities for.
:type X: pandas.DataFrame
:return: Predicted class probabilities.
:rtype: list[list]
"""
index = X.index
pred_proba = self.index_predictions(index, self.predict_proba)
return pred_proba


class TestSurrogateErrorTree(object):

@pytest.mark.parametrize('analyzer_type', [AnalyzerType.MODEL,
Expand Down Expand Up @@ -319,6 +394,33 @@ def test_invalid_multidim_label(self):
run_error_analyzer(model, X_test, y_test, feature_names,
AnalyzerType.MODEL)

@pytest.mark.skipif(not pytorch_installed,
reason="requires torch/torchvision")
def test_surrogate_error_tree_object_detection(self):
model_task = ModelTask.OBJECT_DETECTION
model = get_object_detection_fridge_model()

dataset = load_fridge_object_detection_dataset()
dataset = dataset.iloc[:3]
X_test = dataset[[ImageColumns.IMAGE]]
y_test = dataset[[ImageColumns.LABEL]]
feature_names = [ImageColumns.IMAGE]
image_mode = 'RGB'
model = wrap_model(model, X_test, model_task)
model = SimplifiedWrappedIndexPredictorModel(model, X_test, image_mode)
extracted_feature_names = ["mean_pixel_value", "is_cool_image"]
dummy_data = [[141, True], [54, False], [212, True]]
ext_dataset = pd.DataFrame(data=dummy_data,
columns=extracted_feature_names)

run_error_analyzer(model,
ext_dataset,
y_test,
feature_names,
AnalyzerType.MODEL,
categorical_features=[],
model_task=model_task)


def run_error_analyzer(model, X_test, y_test, feature_names,
analyzer_type, categorical_features=None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

# domain mapped session for reuse
_requests_sessions = {}
IMAGE = "image"
LABEL = "label"


def _get_retry_session(url):
Expand Down Expand Up @@ -105,7 +107,6 @@ def get_images(dataset, image_mode, transformations=None):
:return: The images.
:rtype: numpy.ndarray
"""
IMAGE = "image"
IMAGE_URL = "image_url"

column_names = dataset.columns
Expand Down Expand Up @@ -204,11 +205,9 @@ def load_fridge_object_detection_dataset():
labels = load_fridge_object_detection_dataset_labels()

# get all file names into a pandas dataframe with the labels
data = pd.DataFrame(columns=["image", "label"])
rows = []
for i, file in enumerate(os.listdir("./data/odFridgeObjects/" + "images")):
image_path = "./data/odFridgeObjects/" + "images" + "/" + file
data = data.append({"image": image_path,
"label": labels[i]}, # folder
ignore_index=True)

rows.append({IMAGE: image_path, LABEL: labels[i]})
data = pd.DataFrame(rows, columns=[IMAGE, LABEL])
return data

0 comments on commit c90c654

Please sign in to comment.