add read me

This commit is contained in:
2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions

View File

@@ -0,0 +1,16 @@
"""Tools for model inspection."""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from ._partial_dependence import partial_dependence
from ._permutation_importance import permutation_importance
from ._plot.decision_boundary import DecisionBoundaryDisplay
from ._plot.partial_dependence import PartialDependenceDisplay
__all__ = [
"DecisionBoundaryDisplay",
"PartialDependenceDisplay",
"partial_dependence",
"permutation_importance",
]

View File

@@ -0,0 +1,775 @@
"""Partial dependence plots for regression and classification models."""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import warnings
from collections.abc import Iterable
import numpy as np
from scipy import sparse
from scipy.stats.mstats import mquantiles
from ..base import is_classifier, is_regressor
from ..ensemble import RandomForestRegressor
from ..ensemble._gb import BaseGradientBoosting
from ..ensemble._hist_gradient_boosting.gradient_boosting import (
BaseHistGradientBoosting,
)
from ..tree import DecisionTreeRegressor
from ..utils import Bunch, _safe_indexing, check_array
from ..utils._indexing import _determine_key_type, _get_column_indices, _safe_assign
from ..utils._optional_dependencies import check_matplotlib_support # noqa: F401
from ..utils._param_validation import (
HasMethods,
Integral,
Interval,
StrOptions,
validate_params,
)
from ..utils._response import _get_response_values
from ..utils.extmath import cartesian
from ..utils.validation import _check_sample_weight, check_is_fitted
from ._pd_utils import _check_feature_names, _get_feature_index
__all__ = [
"partial_dependence",
]
def _grid_from_X(X, percentiles, is_categorical, grid_resolution, custom_values):
"""Generate a grid of points based on the percentiles of X.
The grid is a cartesian product between the columns of ``values``. The
ith column of ``values`` consists in ``grid_resolution`` equally-spaced
points between the percentiles of the jth column of X.
If ``grid_resolution`` is bigger than the number of unique values in the
j-th column of X or if the feature is a categorical feature (by inspecting
`is_categorical`) , then those unique values will be used instead.
Parameters
----------
X : array-like of shape (n_samples, n_target_features)
The data.
percentiles : tuple of float
The percentiles which are used to construct the extreme values of
the grid. Must be in [0, 1].
is_categorical : list of bool
For each feature, tells whether it is categorical or not. If a feature
is categorical, then the values used will be the unique ones
(i.e. categories) instead of the percentiles.
grid_resolution : int
The number of equally spaced points to be placed on the grid for each
feature.
custom_values: dict
Mapping from column index of X to an array-like of values where
the partial dependence should be calculated for that feature
Returns
-------
grid : ndarray of shape (n_points, n_target_features)
A value for each feature at each point in the grid. ``n_points`` is
always ``<= grid_resolution ** X.shape[1]``.
values : list of 1d ndarrays
The values with which the grid has been created. The size of each
array ``values[j]`` is either ``grid_resolution``, the number of
unique values in ``X[:, j]``, if j is not in ``custom_range``.
If j is in ``custom_range``, then it is the length of ``custom_range[j]``.
"""
if not isinstance(percentiles, Iterable) or len(percentiles) != 2:
raise ValueError("'percentiles' must be a sequence of 2 elements.")
if not all(0 <= x <= 1 for x in percentiles):
raise ValueError("'percentiles' values must be in [0, 1].")
if percentiles[0] >= percentiles[1]:
raise ValueError("percentiles[0] must be strictly less than percentiles[1].")
if grid_resolution <= 1:
raise ValueError("'grid_resolution' must be strictly greater than 1.")
def _convert_custom_values(values):
# Convert custom types such that object types are always used for string arrays
dtype = object if any(isinstance(v, str) for v in values) else None
return np.asarray(values, dtype=dtype)
custom_values = {k: _convert_custom_values(v) for k, v in custom_values.items()}
if any(v.ndim != 1 for v in custom_values.values()):
error_string = ", ".join(
f"Feature {k}: {v.ndim} dimensions"
for k, v in custom_values.items()
if v.ndim != 1
)
raise ValueError(
"The custom grid for some features is not a one-dimensional array. "
f"{error_string}"
)
values = []
# TODO: we should handle missing values (i.e. `np.nan`) specifically and store them
# in a different Bunch attribute.
for feature, is_cat in enumerate(is_categorical):
if feature in custom_values:
# Use values in the custom range
axis = custom_values[feature]
else:
try:
uniques = np.unique(_safe_indexing(X, feature, axis=1))
except TypeError as exc:
# `np.unique` will fail in the presence of `np.nan` and `str` categories
# due to sorting. Temporary, we reraise an error explaining the problem.
raise ValueError(
f"The column #{feature} contains mixed data types. Finding unique "
"categories fail due to sorting. It usually means that the column "
"contains `np.nan` values together with `str` categories. Such use "
"case is not yet supported in scikit-learn."
) from exc
if is_cat or uniques.shape[0] < grid_resolution:
# Use the unique values either because:
# - feature has low resolution use unique values
# - feature is categorical
axis = uniques
else:
# create axis based on percentiles and grid resolution
emp_percentiles = mquantiles(
_safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
)
if np.allclose(emp_percentiles[0], emp_percentiles[1]):
raise ValueError(
"percentiles are too close to each other, "
"unable to build the grid. Please choose percentiles "
"that are further apart."
)
axis = np.linspace(
emp_percentiles[0],
emp_percentiles[1],
num=grid_resolution,
endpoint=True,
)
values.append(axis)
return cartesian(values), values
def _partial_dependence_recursion(est, grid, features):
"""Calculate partial dependence via the recursion method.
The recursion method is in particular enabled for tree-based estimators.
For each `grid` value, a weighted tree traversal is performed: if a split node
involves an input feature of interest, the corresponding left or right branch
is followed; otherwise both branches are followed, each branch being weighted
by the fraction of training samples that entered that branch. Finally, the
partial dependence is given by a weighted average of all the visited leaves
values.
This method is more efficient in terms of speed than the `'brute'` method
(:func:`~sklearn.inspection._partial_dependence._partial_dependence_brute`).
However, here, the partial dependence computation is done explicitly with the
`X` used during training of `est`.
Parameters
----------
est : BaseEstimator
A fitted estimator object implementing :term:`predict` or
:term:`decision_function`. Multioutput-multiclass classifiers are not
supported. Note that `'recursion'` is only supported for some tree-based
estimators (namely
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
:class:`~sklearn.tree.DecisionTreeRegressor`,
:class:`~sklearn.ensemble.RandomForestRegressor`,
).
grid : array-like of shape (n_points, n_target_features)
The grid of feature values for which the partial dependence is calculated.
Note that `n_points` is the number of points in the grid and `n_target_features`
is the number of features you are doing partial dependence at.
features : array-like of {int, str}
The feature (e.g. `[0]`) or pair of interacting features
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
Returns
-------
averaged_predictions : array-like of shape (n_targets, n_points)
The averaged predictions for the given `grid` of features values.
Note that `n_targets` is the number of targets (e.g. 1 for binary
classification, `n_tasks` for multi-output regression, and `n_classes` for
multiclass classification) and `n_points` is the number of points in the `grid`.
"""
averaged_predictions = est._compute_partial_dependence_recursion(grid, features)
if averaged_predictions.ndim == 1:
# reshape to (1, n_points) for consistency with
# _partial_dependence_brute
averaged_predictions = averaged_predictions.reshape(1, -1)
return averaged_predictions
def _partial_dependence_brute(
est, grid, features, X, response_method, sample_weight=None
):
"""Calculate partial dependence via the brute force method.
The brute method explicitly averages the predictions of an estimator over a
grid of feature values.
For each `grid` value, all the samples from `X` have their variables of
interest replaced by that specific `grid` value. The predictions are then made
and averaged across the samples.
This method is slower than the `'recursion'`
(:func:`~sklearn.inspection._partial_dependence._partial_dependence_recursion`)
version for estimators with this second option. However, with the `'brute'`
force method, the average will be done with the given `X` and not the `X`
used during training, as it is done in the `'recursion'` version. Therefore
the average can always accept `sample_weight` (even when the estimator was
fitted without).
Parameters
----------
est : BaseEstimator
A fitted estimator object implementing :term:`predict`,
:term:`predict_proba`, or :term:`decision_function`.
Multioutput-multiclass classifiers are not supported.
grid : array-like of shape (n_points, n_target_features)
The grid of feature values for which the partial dependence is calculated.
Note that `n_points` is the number of points in the grid and `n_target_features`
is the number of features you are doing partial dependence at.
features : array-like of {int, str}
The feature (e.g. `[0]`) or pair of interacting features
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
X : array-like of shape (n_samples, n_features)
`X` is used to generate values for the complement features. That is, for
each value in `grid`, the method will average the prediction of each
sample from `X` having that grid value for `features`.
response_method : {'auto', 'predict_proba', 'decision_function'}, \
default='auto'
Specifies whether to use :term:`predict_proba` or
:term:`decision_function` as the target response. For regressors
this parameter is ignored and the response is always the output of
:term:`predict`. By default, :term:`predict_proba` is tried first
and we revert to :term:`decision_function` if it doesn't exist.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights are used to calculate weighted means when averaging the
model output. If `None`, then samples are equally weighted. Note that
`sample_weight` does not change the individual predictions.
Returns
-------
averaged_predictions : array-like of shape (n_targets, n_points)
The averaged predictions for the given `grid` of features values.
Note that `n_targets` is the number of targets (e.g. 1 for binary
classification, `n_tasks` for multi-output regression, and `n_classes` for
multiclass classification) and `n_points` is the number of points in the `grid`.
predictions : array-like
The predictions for the given `grid` of features values over the samples
from `X`. For non-multioutput regression and binary classification the
shape is `(n_instances, n_points)` and for multi-output regression and
multiclass classification the shape is `(n_targets, n_instances, n_points)`,
where `n_targets` is the number of targets (`n_tasks` for multi-output
regression, and `n_classes` for multiclass classification), `n_instances`
is the number of instances in `X`, and `n_points` is the number of points
in the `grid`.
"""
predictions = []
averaged_predictions = []
if response_method == "auto":
response_method = (
"predict" if is_regressor(est) else ["predict_proba", "decision_function"]
)
X_eval = X.copy()
for new_values in grid:
for i, variable in enumerate(features):
_safe_assign(X_eval, new_values[i], column_indexer=variable)
# Note: predictions is of shape
# (n_points,) for non-multioutput regressors
# (n_points, n_tasks) for multioutput regressors
# (n_points, 1) for the regressors in cross_decomposition (I think)
# (n_points, 1) for binary classification (positive class already selected)
# (n_points, n_classes) for multiclass classification
pred, _ = _get_response_values(est, X_eval, response_method=response_method)
predictions.append(pred)
# average over samples
averaged_predictions.append(np.average(pred, axis=0, weights=sample_weight))
n_samples = X.shape[0]
# reshape to (n_targets, n_instances, n_points) where n_targets is:
# - 1 for non-multioutput regression and binary classification (shape is
# already correct in those cases)
# - n_tasks for multi-output regression
# - n_classes for multiclass classification.
predictions = np.array(predictions).T
if is_regressor(est) and predictions.ndim == 2:
# non-multioutput regression, shape is (n_instances, n_points,)
predictions = predictions.reshape(n_samples, -1)
elif is_classifier(est) and predictions.shape[0] == 2:
# Binary classification, shape is (2, n_instances, n_points).
# we output the effect of **positive** class
predictions = predictions[1]
predictions = predictions.reshape(n_samples, -1)
# reshape averaged_predictions to (n_targets, n_points) where n_targets is:
# - 1 for non-multioutput regression and binary classification (shape is
# already correct in those cases)
# - n_tasks for multi-output regression
# - n_classes for multiclass classification.
averaged_predictions = np.array(averaged_predictions).T
if averaged_predictions.ndim == 1:
# reshape to (1, n_points) for consistency with
# _partial_dependence_recursion
averaged_predictions = averaged_predictions.reshape(1, -1)
return averaged_predictions, predictions
@validate_params(
{
"estimator": [
HasMethods(["fit", "predict"]),
HasMethods(["fit", "predict_proba"]),
HasMethods(["fit", "decision_function"]),
],
"X": ["array-like", "sparse matrix"],
"features": ["array-like", Integral, str],
"sample_weight": ["array-like", None],
"categorical_features": ["array-like", None],
"feature_names": ["array-like", None],
"response_method": [StrOptions({"auto", "predict_proba", "decision_function"})],
"percentiles": [tuple],
"grid_resolution": [Interval(Integral, 1, None, closed="left")],
"method": [StrOptions({"auto", "recursion", "brute"})],
"kind": [StrOptions({"average", "individual", "both"})],
"custom_values": [dict, None],
},
prefer_skip_nested_validation=True,
)
def partial_dependence(
estimator,
X,
features,
*,
sample_weight=None,
categorical_features=None,
feature_names=None,
response_method="auto",
percentiles=(0.05, 0.95),
grid_resolution=100,
custom_values=None,
method="auto",
kind="average",
):
"""Partial dependence of ``features``.
Partial dependence of a feature (or a set of features) corresponds to
the average response of an estimator for each possible value of the
feature.
Read more in
:ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
and the :ref:`User Guide <partial_dependence>`.
.. warning::
For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
:class:`~sklearn.ensemble.GradientBoostingRegressor`, the
`'recursion'` method (used by default) will not account for the `init`
predictor of the boosting process. In practice, this will produce
the same values as `'brute'` up to a constant offset in the target
response, provided that `init` is a constant estimator (which is the
default). However, if `init` is not a constant estimator, the
partial dependence values are incorrect for `'recursion'` because the
offset will be sample-dependent. It is preferable to use the `'brute'`
method. Note that this only applies to
:class:`~sklearn.ensemble.GradientBoostingClassifier` and
:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
Parameters
----------
estimator : BaseEstimator
A fitted estimator object implementing :term:`predict`,
:term:`predict_proba`, or :term:`decision_function`.
Multioutput-multiclass classifiers are not supported.
X : {array-like, sparse matrix or dataframe} of shape (n_samples, n_features)
``X`` is used to generate a grid of values for the target
``features`` (where the partial dependence will be evaluated), and
also to generate values for the complement features when the
`method` is 'brute'.
features : array-like of {int, str, bool} or int or str
The feature (e.g. `[0]`) or pair of interacting features
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights are used to calculate weighted means when averaging the
model output. If `None`, then samples are equally weighted. If
`sample_weight` is not `None`, then `method` will be set to `'brute'`.
Note that `sample_weight` is ignored for `kind='individual'`.
.. versionadded:: 1.3
categorical_features : array-like of shape (n_features,) or shape \
(n_categorical_features,), dtype={bool, int, str}, default=None
Indicates the categorical features.
- `None`: no feature will be considered categorical;
- boolean array-like: boolean mask of shape `(n_features,)`
indicating which features are categorical. Thus, this array has
the same shape has `X.shape[1]`;
- integer or string array-like: integer indices or strings
indicating categorical features.
.. versionadded:: 1.2
feature_names : array-like of shape (n_features,), dtype=str, default=None
Name of each feature; `feature_names[i]` holds the name of the feature
with index `i`.
By default, the name of the feature corresponds to their numerical
index for NumPy array and their column name for pandas dataframe.
.. versionadded:: 1.2
response_method : {'auto', 'predict_proba', 'decision_function'}, \
default='auto'
Specifies whether to use :term:`predict_proba` or
:term:`decision_function` as the target response. For regressors
this parameter is ignored and the response is always the output of
:term:`predict`. By default, :term:`predict_proba` is tried first
and we revert to :term:`decision_function` if it doesn't exist. If
``method`` is 'recursion', the response is always the output of
:term:`decision_function`.
percentiles : tuple of float, default=(0.05, 0.95)
The lower and upper percentile used to create the extreme values
for the grid. Must be in [0, 1].
This parameter is overridden by `custom_values` if that parameter is set.
grid_resolution : int, default=100
The number of equally spaced points on the grid, for each target
feature.
This parameter is overridden by `custom_values` if that parameter is set.
custom_values : dict
A dictionary mapping the index of an element of `features` to an array
of values where the partial dependence should be calculated
for that feature. Setting a range of values for a feature overrides
`grid_resolution` and `percentiles`.
See :ref:`how to use partial_dependence
<plt_partial_dependence_custom_values>` for an example of how this parameter can
be used.
.. versionadded:: 1.7
method : {'auto', 'recursion', 'brute'}, default='auto'
The method used to calculate the averaged predictions:
- `'recursion'` is only supported for some tree-based estimators
(namely
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
:class:`~sklearn.tree.DecisionTreeRegressor`,
:class:`~sklearn.ensemble.RandomForestRegressor`,
) when `kind='average'`.
This is more efficient in terms of speed.
With this method, the target response of a
classifier is always the decision function, not the predicted
probabilities. Since the `'recursion'` method implicitly computes
the average of the Individual Conditional Expectation (ICE) by
design, it is not compatible with ICE and thus `kind` must be
`'average'`.
- `'brute'` is supported for any estimator, but is more
computationally intensive.
- `'auto'`: the `'recursion'` is used for estimators that support it,
and `'brute'` is used otherwise. If `sample_weight` is not `None`,
then `'brute'` is used regardless of the estimator.
Please see :ref:`this note <pdp_method_differences>` for
differences between the `'brute'` and `'recursion'` method.
kind : {'average', 'individual', 'both'}, default='average'
Whether to return the partial dependence averaged across all the
samples in the dataset or one value per sample or both.
See Returns below.
Note that the fast `method='recursion'` option is only available for
`kind='average'` and `sample_weights=None`. Computing individual
dependencies and doing weighted averages requires using the slower
`method='brute'`.
.. versionadded:: 0.24
Returns
-------
predictions : :class:`~sklearn.utils.Bunch`
Dictionary-like object, with the following attributes.
individual : ndarray of shape (n_outputs, n_instances, \
len(values[0]), len(values[1]), ...)
The predictions for all the points in the grid for all
samples in X. This is also known as Individual
Conditional Expectation (ICE).
Only available when `kind='individual'` or `kind='both'`.
average : ndarray of shape (n_outputs, len(values[0]), \
len(values[1]), ...)
The predictions for all the points in the grid, averaged
over all samples in X (or over the training data if
`method` is 'recursion').
Only available when `kind='average'` or `kind='both'`.
grid_values : seq of 1d ndarrays
The values with which the grid has been created. The generated
grid is a cartesian product of the arrays in `grid_values` where
`len(grid_values) == len(features)`. The size of each array
`grid_values[j]` is either `grid_resolution`, or the number of
unique values in `X[:, j]`, whichever is smaller.
.. versionadded:: 1.3
`n_outputs` corresponds to the number of classes in a multi-class
setting, or to the number of tasks for multi-output regression.
For classical regression and binary classification `n_outputs==1`.
`n_values_feature_j` corresponds to the size `grid_values[j]`.
See Also
--------
PartialDependenceDisplay.from_estimator : Plot Partial Dependence.
PartialDependenceDisplay : Partial Dependence visualization.
Examples
--------
>>> X = [[0, 0, 2], [1, 0, 0]]
>>> y = [0, 1]
>>> from sklearn.ensemble import GradientBoostingClassifier
>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
... grid_resolution=2) # doctest: +SKIP
(array([[-4.52, 4.52]]), [array([ 0., 1.])])
"""
check_is_fitted(estimator)
if not (is_classifier(estimator) or is_regressor(estimator)):
raise ValueError("'estimator' must be a fitted regressor or classifier.")
if is_classifier(estimator) and isinstance(estimator.classes_[0], np.ndarray):
raise ValueError("Multiclass-multioutput estimators are not supported")
# Use check_array only on lists and other non-array-likes / sparse. Do not
# convert DataFrame into a NumPy array.
if not (hasattr(X, "__array__") or sparse.issparse(X)):
X = check_array(X, ensure_all_finite="allow-nan", dtype=object)
if is_regressor(estimator) and response_method != "auto":
raise ValueError(
"The response_method parameter is ignored for regressors and "
"must be 'auto'."
)
if kind != "average":
if method == "recursion":
raise ValueError(
"The 'recursion' method only applies when 'kind' is set to 'average'"
)
method = "brute"
if method == "recursion" and sample_weight is not None:
raise ValueError(
"The 'recursion' method can only be applied when sample_weight is None."
)
if method == "auto":
if sample_weight is not None:
method = "brute"
elif isinstance(estimator, BaseGradientBoosting) and estimator.init is None:
method = "recursion"
elif isinstance(
estimator,
(BaseHistGradientBoosting, DecisionTreeRegressor, RandomForestRegressor),
):
method = "recursion"
else:
method = "brute"
if method == "recursion":
if not isinstance(
estimator,
(
BaseGradientBoosting,
BaseHistGradientBoosting,
DecisionTreeRegressor,
RandomForestRegressor,
),
):
supported_classes_recursion = (
"GradientBoostingClassifier",
"GradientBoostingRegressor",
"HistGradientBoostingClassifier",
"HistGradientBoostingRegressor",
"HistGradientBoostingRegressor",
"DecisionTreeRegressor",
"RandomForestRegressor",
)
raise ValueError(
"Only the following estimators support the 'recursion' "
"method: {}. Try using method='brute'.".format(
", ".join(supported_classes_recursion)
)
)
if response_method == "auto":
response_method = "decision_function"
if response_method != "decision_function":
raise ValueError(
"With the 'recursion' method, the response_method must be "
"'decision_function'. Got {}.".format(response_method)
)
if sample_weight is not None:
sample_weight = _check_sample_weight(sample_weight, X)
if _determine_key_type(features, accept_slice=False) == "int":
# _get_column_indices() supports negative indexing. Here, we limit
# the indexing to be positive. The upper bound will be checked
# by _get_column_indices()
if np.any(np.less(features, 0)):
raise ValueError("all features must be in [0, {}]".format(X.shape[1] - 1))
features_indices = np.asarray(
_get_column_indices(X, features), dtype=np.intp, order="C"
).ravel()
feature_names = _check_feature_names(X, feature_names)
n_features = X.shape[1]
if categorical_features is None:
is_categorical = [False] * len(features_indices)
else:
categorical_features = np.asarray(categorical_features)
if categorical_features.size == 0:
raise ValueError(
"Passing an empty list (`[]`) to `categorical_features` is not "
"supported. Use `None` instead to indicate that there are no "
"categorical features."
)
if categorical_features.dtype.kind == "b":
# categorical features provided as a list of boolean
if categorical_features.size != n_features:
raise ValueError(
"When `categorical_features` is a boolean array-like, "
"the array should be of shape (n_features,). Got "
f"{categorical_features.size} elements while `X` contains "
f"{n_features} features."
)
is_categorical = [categorical_features[idx] for idx in features_indices]
elif categorical_features.dtype.kind in ("i", "O", "U"):
# categorical features provided as a list of indices or feature names
categorical_features_idx = [
_get_feature_index(cat, feature_names=feature_names)
for cat in categorical_features
]
is_categorical = [
idx in categorical_features_idx for idx in features_indices
]
else:
raise ValueError(
"Expected `categorical_features` to be an array-like of boolean,"
f" integer, or string. Got {categorical_features.dtype} instead."
)
custom_values = custom_values or {}
if isinstance(features, (str, int)):
features = [features]
for feature_idx, feature, is_cat in zip(features_indices, features, is_categorical):
if is_cat:
continue
if _safe_indexing(X, feature_idx, axis=1).dtype.kind in "iu":
# TODO(1.9): raise a ValueError instead.
warnings.warn(
f"The column {feature!r} contains integer data. Partial "
"dependence plots are not supported for integer data: this "
"can lead to implicit rounding with NumPy arrays or even errors "
"with newer pandas versions. Please convert numerical features"
"to floating point dtypes ahead of time to avoid problems. "
"This will raise ValueError in scikit-learn 1.9.",
FutureWarning,
)
# Do not warn again for other features to avoid spamming the caller.
break
X_subset = _safe_indexing(X, features_indices, axis=1)
custom_values_for_X_subset = {
index: custom_values.get(feature)
for index, feature in enumerate(features)
if feature in custom_values
}
grid, values = _grid_from_X(
X_subset,
percentiles,
is_categorical,
grid_resolution,
custom_values_for_X_subset,
)
if method == "brute":
averaged_predictions, predictions = _partial_dependence_brute(
estimator, grid, features_indices, X, response_method, sample_weight
)
# reshape predictions to
# (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)
predictions = predictions.reshape(
-1, X.shape[0], *[val.shape[0] for val in values]
)
else:
averaged_predictions = _partial_dependence_recursion(
estimator, grid, features_indices
)
# reshape averaged_predictions to
# (n_outputs, n_values_feature_0, n_values_feature_1, ...)
averaged_predictions = averaged_predictions.reshape(
-1, *[val.shape[0] for val in values]
)
pdp_results = Bunch(grid_values=values)
if kind == "average":
pdp_results["average"] = averaged_predictions
elif kind == "individual":
pdp_results["individual"] = predictions
else: # kind='both'
pdp_results["average"] = averaged_predictions
pdp_results["individual"] = predictions
return pdp_results

View File

@@ -0,0 +1,68 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
def _check_feature_names(X, feature_names=None):
"""Check feature names.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Input data.
feature_names : None or array-like of shape (n_names,), dtype=str
Feature names to check or `None`.
Returns
-------
feature_names : list of str
Feature names validated. If `feature_names` is `None`, then a list of
feature names is provided, i.e. the column names of a pandas dataframe
or a generic list of feature names (e.g. `["x0", "x1", ...]`) for a
NumPy array.
"""
if feature_names is None:
if hasattr(X, "columns") and hasattr(X.columns, "tolist"):
# get the column names for a pandas dataframe
feature_names = X.columns.tolist()
else:
# define a list of numbered indices for a numpy array
feature_names = [f"x{i}" for i in range(X.shape[1])]
elif hasattr(feature_names, "tolist"):
# convert numpy array or pandas index to a list
feature_names = feature_names.tolist()
if len(set(feature_names)) != len(feature_names):
raise ValueError("feature_names should not contain duplicates.")
return feature_names
def _get_feature_index(fx, feature_names=None):
"""Get feature index.
Parameters
----------
fx : int or str
Feature index or name.
feature_names : list of str, default=None
All feature names from which to search the indices.
Returns
-------
idx : int
Feature index.
"""
if isinstance(fx, str):
if feature_names is None:
raise ValueError(
f"Cannot plot partial dependence for feature {fx!r} since "
"the list of feature names was not provided, neither as "
"column names of a pandas data-frame nor via the feature_names "
"parameter."
)
try:
return feature_names.index(fx)
except ValueError as e:
raise ValueError(f"Feature {fx!r} not in feature_names") from e
return fx

View File

@@ -0,0 +1,313 @@
"""Permutation importance for estimators."""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import numbers
import numpy as np
from ..ensemble._bagging import _generate_indices
from ..metrics import check_scoring, get_scorer_names
from ..model_selection._validation import _aggregate_score_dicts
from ..utils import Bunch, _safe_indexing, check_array, check_random_state
from ..utils._param_validation import (
HasMethods,
Integral,
Interval,
RealNotInt,
StrOptions,
validate_params,
)
from ..utils.parallel import Parallel, delayed
def _weights_scorer(scorer, estimator, X, y, sample_weight):
if sample_weight is not None:
return scorer(estimator, X, y, sample_weight=sample_weight)
return scorer(estimator, X, y)
def _calculate_permutation_scores(
estimator,
X,
y,
sample_weight,
col_idx,
random_state,
n_repeats,
scorer,
max_samples,
):
"""Calculate score when `col_idx` is permuted."""
random_state = check_random_state(random_state)
# Work on a copy of X to ensure thread-safety in case of threading based
# parallelism. Furthermore, making a copy is also useful when the joblib
# backend is 'loky' (default) or the old 'multiprocessing': in those cases,
# if X is large it will be automatically be backed by a readonly memory map
# (memmap). X.copy() on the other hand is always guaranteed to return a
# writable data-structure whose columns can be shuffled inplace.
if max_samples < X.shape[0]:
row_indices = _generate_indices(
random_state=random_state,
bootstrap=False,
n_population=X.shape[0],
n_samples=max_samples,
)
X_permuted = _safe_indexing(X, row_indices, axis=0)
y = _safe_indexing(y, row_indices, axis=0)
if sample_weight is not None:
sample_weight = _safe_indexing(sample_weight, row_indices, axis=0)
else:
X_permuted = X.copy()
scores = []
shuffling_idx = np.arange(X_permuted.shape[0])
for _ in range(n_repeats):
random_state.shuffle(shuffling_idx)
if hasattr(X_permuted, "iloc"):
col = X_permuted.iloc[shuffling_idx, col_idx]
col.index = X_permuted.index
X_permuted[X_permuted.columns[col_idx]] = col
else:
X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))
if isinstance(scores[0], dict):
scores = _aggregate_score_dicts(scores)
else:
scores = np.array(scores)
return scores
def _create_importances_bunch(baseline_score, permuted_score):
"""Compute the importances as the decrease in score.
Parameters
----------
baseline_score : ndarray of shape (n_features,)
The baseline score without permutation.
permuted_score : ndarray of shape (n_features, n_repeats)
The permuted scores for the `n` repetitions.
Returns
-------
importances : :class:`~sklearn.utils.Bunch`
Dictionary-like object, with the following attributes.
importances_mean : ndarray, shape (n_features, )
Mean of feature importance over `n_repeats`.
importances_std : ndarray, shape (n_features, )
Standard deviation over `n_repeats`.
importances : ndarray, shape (n_features, n_repeats)
Raw permutation importance scores.
"""
importances = baseline_score - permuted_score
return Bunch(
importances_mean=np.mean(importances, axis=1),
importances_std=np.std(importances, axis=1),
importances=importances,
)
@validate_params(
{
"estimator": [HasMethods(["fit"])],
"X": ["array-like"],
"y": ["array-like", None],
"scoring": [
StrOptions(set(get_scorer_names())),
callable,
list,
tuple,
dict,
None,
],
"n_repeats": [Interval(Integral, 1, None, closed="left")],
"n_jobs": [Integral, None],
"random_state": ["random_state"],
"sample_weight": ["array-like", None],
"max_samples": [
Interval(Integral, 1, None, closed="left"),
Interval(RealNotInt, 0, 1, closed="right"),
],
},
prefer_skip_nested_validation=True,
)
def permutation_importance(
estimator,
X,
y,
*,
scoring=None,
n_repeats=5,
n_jobs=None,
random_state=None,
sample_weight=None,
max_samples=1.0,
):
"""Permutation importance for feature evaluation [BRE]_.
The :term:`estimator` is required to be a fitted estimator. `X` can be the
data set used to train the estimator or a hold-out set. The permutation
importance of a feature is calculated as follows. First, a baseline metric,
defined by :term:`scoring`, is evaluated on a (potentially different)
dataset defined by the `X`. Next, a feature column from the validation set
is permuted and the metric is evaluated again. The permutation importance
is defined to be the difference between the baseline metric and metric from
permutating the feature column.
Read more in the :ref:`User Guide <permutation_importance>`.
Parameters
----------
estimator : object
An estimator that has already been :term:`fitted` and is compatible
with :term:`scorer`.
X : ndarray or DataFrame, shape (n_samples, n_features)
Data on which permutation importance will be computed.
y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
Targets for supervised or `None` for unsupervised.
scoring : str, callable, list, tuple, or dict, default=None
Scorer to use.
If `scoring` represents a single score, one can use:
- str: see :ref:`scoring_string_names` for options.
- callable: a scorer callable object (e.g., function) with signature
``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
- `None`: the `estimator`'s
:ref:`default evaluation criterion <scoring_api_overview>` is used.
If `scoring` represents multiple scores, one can use:
- a list or tuple of unique strings;
- a callable returning a dictionary where the keys are the metric
names and the values are the metric scores;
- a dictionary with metric names as keys and callables a values.
Passing multiple scores to `scoring` is more efficient than calling
`permutation_importance` for each of the scores as it reuses
predictions to avoid redundant computation.
n_repeats : int, default=5
Number of times to permute a feature.
n_jobs : int or None, default=None
Number of jobs to run in parallel. The computation is done by computing
permutation score for each columns and parallelized over the columns.
`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
`-1` means using all processors. See :term:`Glossary <n_jobs>`
for more details.
random_state : int, RandomState instance, default=None
Pseudo-random number generator to control the permutations of each
feature.
Pass an int to get reproducible results across function calls.
See :term:`Glossary <random_state>`.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights used in scoring.
.. versionadded:: 0.24
max_samples : int or float, default=1.0
The number of samples to draw from X to compute feature importance
in each repeat (without replacement).
- If int, then draw `max_samples` samples.
- If float, then draw `max_samples * X.shape[0]` samples.
- If `max_samples` is equal to `1.0` or `X.shape[0]`, all samples
will be used.
While using this option may provide less accurate importance estimates,
it keeps the method tractable when evaluating feature importance on
large datasets. In combination with `n_repeats`, this allows to control
the computational speed vs statistical accuracy trade-off of this method.
.. versionadded:: 1.0
Returns
-------
result : :class:`~sklearn.utils.Bunch` or dict of such instances
Dictionary-like object, with the following attributes.
importances_mean : ndarray of shape (n_features, )
Mean of feature importance over `n_repeats`.
importances_std : ndarray of shape (n_features, )
Standard deviation over `n_repeats`.
importances : ndarray of shape (n_features, n_repeats)
Raw permutation importance scores.
If there are multiple scoring metrics in the scoring parameter
`result` is a dict with scorer names as keys (e.g. 'roc_auc') and
`Bunch` objects like above as values.
References
----------
.. [BRE] :doi:`L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
2001. <10.1023/A:1010933404324>`
Examples
--------
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.inspection import permutation_importance
>>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],
... [0, 9, 9],[0, 9, 9],[0, 9, 9]]
>>> y = [1, 1, 1, 0, 0, 0]
>>> clf = LogisticRegression().fit(X, y)
>>> result = permutation_importance(clf, X, y, n_repeats=10,
... random_state=0)
>>> result.importances_mean
array([0.4666, 0. , 0. ])
>>> result.importances_std
array([0.2211, 0. , 0. ])
"""
if not hasattr(X, "iloc"):
X = check_array(X, ensure_all_finite="allow-nan", dtype=None)
# Precompute random seed from the random state to be used
# to get a fresh independent RandomState instance for each
# parallel call to _calculate_permutation_scores, irrespective of
# the fact that variables are shared or not depending on the active
# joblib backend (sequential, thread-based or process-based).
random_state = check_random_state(random_state)
random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
if not isinstance(max_samples, numbers.Integral):
max_samples = int(max_samples * X.shape[0])
elif max_samples > X.shape[0]:
raise ValueError("max_samples must be <= n_samples")
scorer = check_scoring(estimator, scoring=scoring)
baseline_score = _weights_scorer(scorer, estimator, X, y, sample_weight)
scores = Parallel(n_jobs=n_jobs)(
delayed(_calculate_permutation_scores)(
estimator,
X,
y,
sample_weight,
col_idx,
random_seed,
n_repeats,
scorer,
max_samples,
)
for col_idx in range(X.shape[1])
)
if isinstance(baseline_score, dict):
return {
name: _create_importances_bunch(
baseline_score[name],
# unpack the permuted scores
np.array([scores[col_idx][name] for col_idx in range(X.shape[1])]),
)
for name in baseline_score
}
else:
return _create_importances_bunch(baseline_score, np.array(scores))

View File

@@ -0,0 +1,2 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause

View File

@@ -0,0 +1,564 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import warnings
import numpy as np
from ...base import is_regressor
from ...preprocessing import LabelEncoder
from ...utils import _safe_indexing
from ...utils._optional_dependencies import check_matplotlib_support
from ...utils._response import _get_response_values
from ...utils._set_output import _get_adapter_from_container
from ...utils.validation import (
_is_arraylike_not_scalar,
_is_pandas_df,
_is_polars_df,
_num_features,
check_is_fitted,
)
def _check_boundary_response_method(estimator, response_method, class_of_interest):
"""Validate the response methods to be used with the fitted estimator.
Parameters
----------
estimator : object
Fitted estimator to check.
response_method : {'auto', 'decision_function', 'predict_proba', 'predict'}
Specifies whether to use :term:`decision_function`, :term:`predict_proba`,
:term:`predict` as the target response. If set to 'auto', the response method is
tried in the before mentioned order.
class_of_interest : int, float, bool, str or None
The class considered when plotting the decision. Cannot be None if
multiclass and `response_method` is 'predict_proba' or 'decision_function'.
.. versionadded:: 1.4
Returns
-------
prediction_method : list of str or str
The name or list of names of the response methods to use.
"""
has_classes = hasattr(estimator, "classes_")
if has_classes and _is_arraylike_not_scalar(estimator.classes_[0]):
msg = "Multi-label and multi-output multi-class classifiers are not supported"
raise ValueError(msg)
if response_method == "auto":
if is_regressor(estimator):
prediction_method = "predict"
else:
prediction_method = ["decision_function", "predict_proba", "predict"]
else:
prediction_method = response_method
return prediction_method
class DecisionBoundaryDisplay:
"""Decisions boundary visualization.
It is recommended to use
:func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`
to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as
attributes.
Read more in the :ref:`User Guide <visualizations>`.
For a detailed example comparing the decision boundaries of multinomial and
one-vs-rest logistic regression, please see
:ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py`.
.. versionadded:: 1.1
Parameters
----------
xx0 : ndarray of shape (grid_resolution, grid_resolution)
First output of :func:`meshgrid <numpy.meshgrid>`.
xx1 : ndarray of shape (grid_resolution, grid_resolution)
Second output of :func:`meshgrid <numpy.meshgrid>`.
response : ndarray of shape (grid_resolution, grid_resolution) or \
(grid_resolution, grid_resolution, n_classes)
Values of the response function.
multiclass_colors : list of str or str, default=None
Specifies how to color each class when plotting all classes of multiclass
problem. Ignored for binary problems and multiclass problems when plotting a
single prediction value per point.
Possible inputs are:
* list: list of Matplotlib
`color <https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def>`_
strings, of length `n_classes`
* str: name of :class:`matplotlib.colors.Colormap`
* None: 'viridis' colormap is used to sample colors
Single color colormaps will be generated from the colors in the list or
colors taken from the colormap and passed to the `cmap` parameter of
the `plot_method`.
.. versionadded:: 1.7
xlabel : str, default=None
Default label to place on x axis.
ylabel : str, default=None
Default label to place on y axis.
Attributes
----------
surface_ : matplotlib `QuadContourSet` or `QuadMesh` or list of such objects
If `plot_method` is 'contour' or 'contourf', `surface_` is
:class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If
`plot_method` is 'pcolormesh', `surface_` is
:class:`QuadMesh <matplotlib.collections.QuadMesh>`.
multiclass_colors_ : array of shape (n_classes, 4)
Colors used to plot each class in multiclass problems.
Only defined when `color_of_interest` is None.
.. versionadded:: 1.7
ax_ : matplotlib Axes
Axes with decision boundary.
figure_ : matplotlib Figure
Figure containing the decision boundary.
See Also
--------
DecisionBoundaryDisplay.from_estimator : Plot decision boundary given an estimator.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> import numpy as np
>>> from sklearn.datasets import load_iris
>>> from sklearn.inspection import DecisionBoundaryDisplay
>>> from sklearn.tree import DecisionTreeClassifier
>>> iris = load_iris()
>>> feature_1, feature_2 = np.meshgrid(
... np.linspace(iris.data[:, 0].min(), iris.data[:, 0].max()),
... np.linspace(iris.data[:, 1].min(), iris.data[:, 1].max())
... )
>>> grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T
>>> tree = DecisionTreeClassifier().fit(iris.data[:, :2], iris.target)
>>> y_pred = np.reshape(tree.predict(grid), feature_1.shape)
>>> display = DecisionBoundaryDisplay(
... xx0=feature_1, xx1=feature_2, response=y_pred
... )
>>> display.plot()
<...>
>>> display.ax_.scatter(
... iris.data[:, 0], iris.data[:, 1], c=iris.target, edgecolor="black"
... )
<...>
>>> plt.show()
"""
def __init__(
self, *, xx0, xx1, response, multiclass_colors=None, xlabel=None, ylabel=None
):
self.xx0 = xx0
self.xx1 = xx1
self.response = response
self.multiclass_colors = multiclass_colors
self.xlabel = xlabel
self.ylabel = ylabel
def plot(self, plot_method="contourf", ax=None, xlabel=None, ylabel=None, **kwargs):
"""Plot visualization.
Parameters
----------
plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
Plotting method to call when plotting the response. Please refer
to the following matplotlib documentation for details:
:func:`contourf <matplotlib.pyplot.contourf>`,
:func:`contour <matplotlib.pyplot.contour>`,
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
ax : Matplotlib axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
xlabel : str, default=None
Overwrite the x-axis label.
ylabel : str, default=None
Overwrite the y-axis label.
**kwargs : dict
Additional keyword arguments to be passed to the `plot_method`.
Returns
-------
display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`
Object that stores computed values.
"""
check_matplotlib_support("DecisionBoundaryDisplay.plot")
import matplotlib as mpl
import matplotlib.pyplot as plt
if plot_method not in ("contourf", "contour", "pcolormesh"):
raise ValueError(
"plot_method must be 'contourf', 'contour', or 'pcolormesh'. "
f"Got {plot_method} instead."
)
if ax is None:
_, ax = plt.subplots()
plot_func = getattr(ax, plot_method)
if self.response.ndim == 2:
self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)
else: # self.response.ndim == 3
n_responses = self.response.shape[-1]
for kwarg in ("cmap", "colors"):
if kwarg in kwargs:
warnings.warn(
f"'{kwarg}' is ignored in favor of 'multiclass_colors' "
"in the multiclass case when the response method is "
"'decision_function' or 'predict_proba'."
)
del kwargs[kwarg]
if self.multiclass_colors is None or isinstance(
self.multiclass_colors, str
):
if self.multiclass_colors is None:
cmap = "tab10" if n_responses <= 10 else "gist_rainbow"
else:
cmap = self.multiclass_colors
# Special case for the tab10 and tab20 colormaps that encode a
# discrete set of colors that are easily distinguishable
# contrary to other colormaps that are continuous.
if cmap == "tab10" and n_responses <= 10:
colors = plt.get_cmap("tab10", 10).colors[:n_responses]
elif cmap == "tab20" and n_responses <= 20:
colors = plt.get_cmap("tab20", 20).colors[:n_responses]
else:
cmap = plt.get_cmap(cmap, n_responses)
if not hasattr(cmap, "colors"):
# For LinearSegmentedColormap
colors = cmap(np.linspace(0, 1, n_responses))
else:
colors = cmap.colors
elif isinstance(self.multiclass_colors, list):
colors = [mpl.colors.to_rgba(color) for color in self.multiclass_colors]
else:
raise ValueError("'multiclass_colors' must be a list or a str.")
self.multiclass_colors_ = colors
if plot_method == "contour":
# Plot only argmax map for contour
class_map = self.response.argmax(axis=2)
self.surface_ = plot_func(
self.xx0, self.xx1, class_map, colors=colors, **kwargs
)
else:
multiclass_cmaps = [
mpl.colors.LinearSegmentedColormap.from_list(
f"colormap_{class_idx}", [(1.0, 1.0, 1.0, 1.0), (r, g, b, 1.0)]
)
for class_idx, (r, g, b, _) in enumerate(colors)
]
self.surface_ = []
for class_idx, cmap in enumerate(multiclass_cmaps):
response = np.ma.array(
self.response[:, :, class_idx],
mask=~(self.response.argmax(axis=2) == class_idx),
)
self.surface_.append(
plot_func(self.xx0, self.xx1, response, cmap=cmap, **kwargs)
)
if xlabel is not None or not ax.get_xlabel():
xlabel = self.xlabel if xlabel is None else xlabel
ax.set_xlabel(xlabel)
if ylabel is not None or not ax.get_ylabel():
ylabel = self.ylabel if ylabel is None else ylabel
ax.set_ylabel(ylabel)
self.ax_ = ax
self.figure_ = ax.figure
return self
@classmethod
def from_estimator(
cls,
estimator,
X,
*,
grid_resolution=100,
eps=1.0,
plot_method="contourf",
response_method="auto",
class_of_interest=None,
multiclass_colors=None,
xlabel=None,
ylabel=None,
ax=None,
**kwargs,
):
"""Plot decision boundary given an estimator.
Read more in the :ref:`User Guide <visualizations>`.
Parameters
----------
estimator : object
Trained estimator used to plot the decision boundary.
X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)
Input data that should be only 2-dimensional.
grid_resolution : int, default=100
Number of grid points to use for plotting decision boundary.
Higher values will make the plot look nicer but be slower to
render.
eps : float, default=1.0
Extends the minimum and maximum values of X for evaluating the
response function.
plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
Plotting method to call when plotting the response. Please refer
to the following matplotlib documentation for details:
:func:`contourf <matplotlib.pyplot.contourf>`,
:func:`contour <matplotlib.pyplot.contour>`,
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
response_method : {'auto', 'decision_function', 'predict_proba', \
'predict'}, default='auto'
Specifies whether to use :term:`decision_function`,
:term:`predict_proba` or :term:`predict` as the target response.
If set to 'auto', the response method is tried in the order as
listed above.
.. versionchanged:: 1.6
For multiclass problems, 'auto' no longer defaults to 'predict'.
class_of_interest : int, float, bool or str, default=None
The class to be plotted when `response_method` is 'predict_proba'
or 'decision_function'. If None, `estimator.classes_[1]` is considered
the positive class for binary classifiers. For multiclass
classifiers, if None, all classes will be represented in the
decision boundary plot; the class with the highest response value
at each point is plotted. The color of each class can be set via
`multiclass_colors`.
.. versionadded:: 1.4
multiclass_colors : list of str, or str, default=None
Specifies how to color each class when plotting multiclass
'predict_proba' or 'decision_function' and `class_of_interest` is
None. Ignored in all other cases.
Possible inputs are:
* list: list of Matplotlib
`color <https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def>`_
strings, of length `n_classes`
* str: name of :class:`matplotlib.colors.Colormap`
* None: 'tab10' colormap is used to sample colors if the number of
classes is less than or equal to 10, otherwise 'gist_rainbow'
colormap.
Single color colormaps will be generated from the colors in the list or
colors taken from the colormap, and passed to the `cmap` parameter of
the `plot_method`.
.. versionadded:: 1.7
xlabel : str, default=None
The label used for the x-axis. If `None`, an attempt is made to
extract a label from `X` if it is a dataframe, otherwise an empty
string is used.
ylabel : str, default=None
The label used for the y-axis. If `None`, an attempt is made to
extract a label from `X` if it is a dataframe, otherwise an empty
string is used.
ax : Matplotlib axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
**kwargs : dict
Additional keyword arguments to be passed to the
`plot_method`.
Returns
-------
display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`
Object that stores the result.
See Also
--------
DecisionBoundaryDisplay : Decision boundary visualization.
sklearn.metrics.ConfusionMatrixDisplay.from_estimator : Plot the
confusion matrix given an estimator, the data, and the label.
sklearn.metrics.ConfusionMatrixDisplay.from_predictions : Plot the
confusion matrix given the true and predicted labels.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import load_iris
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.inspection import DecisionBoundaryDisplay
>>> iris = load_iris()
>>> X = iris.data[:, :2]
>>> classifier = LogisticRegression().fit(X, iris.target)
>>> disp = DecisionBoundaryDisplay.from_estimator(
... classifier, X, response_method="predict",
... xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],
... alpha=0.5,
... )
>>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k")
<...>
>>> plt.show()
"""
check_matplotlib_support(f"{cls.__name__}.from_estimator")
check_is_fitted(estimator)
import matplotlib as mpl
if not grid_resolution > 1:
raise ValueError(
"grid_resolution must be greater than 1. Got"
f" {grid_resolution} instead."
)
if not eps >= 0:
raise ValueError(
f"eps must be greater than or equal to 0. Got {eps} instead."
)
possible_plot_methods = ("contourf", "contour", "pcolormesh")
if plot_method not in possible_plot_methods:
available_methods = ", ".join(possible_plot_methods)
raise ValueError(
f"plot_method must be one of {available_methods}. "
f"Got {plot_method} instead."
)
num_features = _num_features(X)
if num_features != 2:
raise ValueError(
f"n_features must be equal to 2. Got {num_features} instead."
)
if (
response_method in ("predict_proba", "decision_function", "auto")
and multiclass_colors is not None
and hasattr(estimator, "classes_")
and (n_classes := len(estimator.classes_)) > 2
):
if isinstance(multiclass_colors, list):
if len(multiclass_colors) != n_classes:
raise ValueError(
"When 'multiclass_colors' is a list, it must be of the same "
f"length as 'estimator.classes_' ({n_classes}), got: "
f"{len(multiclass_colors)}."
)
elif any(
not mpl.colors.is_color_like(col) for col in multiclass_colors
):
raise ValueError(
"When 'multiclass_colors' is a list, it can only contain valid"
f" Matplotlib color names. Got: {multiclass_colors}"
)
if isinstance(multiclass_colors, str):
if multiclass_colors not in mpl.pyplot.colormaps():
raise ValueError(
"When 'multiclass_colors' is a string, it must be a valid "
f"Matplotlib colormap. Got: {multiclass_colors}"
)
x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)
x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps
xx0, xx1 = np.meshgrid(
np.linspace(x0_min, x0_max, grid_resolution),
np.linspace(x1_min, x1_max, grid_resolution),
)
X_grid = np.c_[xx0.ravel(), xx1.ravel()]
if _is_pandas_df(X) or _is_polars_df(X):
adapter = _get_adapter_from_container(X)
X_grid = adapter.create_container(
X_grid,
X_grid,
columns=X.columns,
)
prediction_method = _check_boundary_response_method(
estimator, response_method, class_of_interest
)
try:
response, _, response_method_used = _get_response_values(
estimator,
X_grid,
response_method=prediction_method,
pos_label=class_of_interest,
return_response_method_used=True,
)
except ValueError as exc:
if "is not a valid label" in str(exc):
# re-raise a more informative error message since `pos_label` is unknown
# to our user when interacting with
# `DecisionBoundaryDisplay.from_estimator`
raise ValueError(
f"class_of_interest={class_of_interest} is not a valid label: It "
f"should be one of {estimator.classes_}"
) from exc
raise
# convert classes predictions into integers
if response_method_used == "predict" and hasattr(estimator, "classes_"):
encoder = LabelEncoder()
encoder.classes_ = estimator.classes_
response = encoder.transform(response)
if response.ndim == 1:
response = response.reshape(*xx0.shape)
else:
if is_regressor(estimator):
raise ValueError("Multi-output regressors are not supported")
if class_of_interest is not None:
# For the multiclass case, `_get_response_values` returns the response
# as-is. Thus, we have a column per class and we need to select the
# column corresponding to the positive class.
col_idx = np.flatnonzero(estimator.classes_ == class_of_interest)[0]
response = response[:, col_idx].reshape(*xx0.shape)
else:
response = response.reshape(*xx0.shape, response.shape[-1])
if xlabel is None:
xlabel = X.columns[0] if hasattr(X, "columns") else ""
if ylabel is None:
ylabel = X.columns[1] if hasattr(X, "columns") else ""
display = cls(
xx0=xx0,
xx1=xx1,
response=response,
multiclass_colors=multiclass_colors,
xlabel=xlabel,
ylabel=ylabel,
)
return display.plot(ax=ax, plot_method=plot_method, **kwargs)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,710 @@
import warnings
import numpy as np
import pytest
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.datasets import (
load_diabetes,
load_iris,
make_classification,
make_multilabel_classification,
)
from sklearn.ensemble import IsolationForest
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import scale
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
assert_array_equal,
)
from sklearn.utils.fixes import parse_version
X, y = make_classification(
n_informative=1,
n_redundant=1,
n_clusters_per_class=1,
n_features=2,
random_state=42,
)
def load_iris_2d_scaled():
X, y = load_iris(return_X_y=True)
X = scale(X)[:, :2]
return X, y
@pytest.fixture(scope="module")
def fitted_clf():
return LogisticRegression().fit(X, y)
def test_input_data_dimension(pyplot):
"""Check that we raise an error when `X` does not have exactly 2 features."""
X, y = make_classification(n_samples=10, n_features=4, random_state=0)
clf = LogisticRegression().fit(X, y)
msg = "n_features must be equal to 2. Got 4 instead."
with pytest.raises(ValueError, match=msg):
DecisionBoundaryDisplay.from_estimator(estimator=clf, X=X)
def test_check_boundary_response_method_error():
"""Check error raised for multi-output multi-class classifiers by
`_check_boundary_response_method`.
"""
class MultiLabelClassifier:
classes_ = [np.array([0, 1]), np.array([0, 1])]
err_msg = "Multi-label and multi-output multi-class classifiers are not supported"
with pytest.raises(ValueError, match=err_msg):
_check_boundary_response_method(MultiLabelClassifier(), "predict", None)
@pytest.mark.parametrize(
"estimator, response_method, class_of_interest, expected_prediction_method",
[
(DecisionTreeRegressor(), "predict", None, "predict"),
(DecisionTreeRegressor(), "auto", None, "predict"),
(LogisticRegression().fit(*load_iris_2d_scaled()), "predict", None, "predict"),
(
LogisticRegression().fit(*load_iris_2d_scaled()),
"auto",
None,
["decision_function", "predict_proba", "predict"],
),
(
LogisticRegression().fit(*load_iris_2d_scaled()),
"predict_proba",
0,
"predict_proba",
),
(
LogisticRegression().fit(*load_iris_2d_scaled()),
"decision_function",
0,
"decision_function",
),
(
LogisticRegression().fit(X, y),
"auto",
None,
["decision_function", "predict_proba", "predict"],
),
(LogisticRegression().fit(X, y), "predict", None, "predict"),
(
LogisticRegression().fit(X, y),
["predict_proba", "decision_function"],
None,
["predict_proba", "decision_function"],
),
],
)
def test_check_boundary_response_method(
estimator, response_method, class_of_interest, expected_prediction_method
):
"""Check the behaviour of `_check_boundary_response_method` for the supported
cases.
"""
prediction_method = _check_boundary_response_method(
estimator, response_method, class_of_interest
)
assert prediction_method == expected_prediction_method
def test_multiclass_predict(pyplot):
"""Check multiclass `response=predict` gives expected results."""
grid_resolution = 10
eps = 1.0
X, y = make_classification(n_classes=3, n_informative=3, random_state=0)
X = X[:, [0, 1]]
lr = LogisticRegression(random_state=0).fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
lr, X, response_method="predict", grid_resolution=grid_resolution, eps=1.0
)
x0_min, x0_max = X[:, 0].min() - eps, X[:, 0].max() + eps
x1_min, x1_max = X[:, 1].min() - eps, X[:, 1].max() + eps
xx0, xx1 = np.meshgrid(
np.linspace(x0_min, x0_max, grid_resolution),
np.linspace(x1_min, x1_max, grid_resolution),
)
response = lr.predict(np.c_[xx0.ravel(), xx1.ravel()])
assert_allclose(disp.response, response.reshape(xx0.shape))
assert_allclose(disp.xx0, xx0)
assert_allclose(disp.xx1, xx1)
@pytest.mark.parametrize(
"kwargs, error_msg",
[
(
{"plot_method": "hello_world"},
r"plot_method must be one of contourf, contour, pcolormesh. Got hello_world"
r" instead.",
),
(
{"grid_resolution": 1},
r"grid_resolution must be greater than 1. Got 1 instead",
),
(
{"grid_resolution": -1},
r"grid_resolution must be greater than 1. Got -1 instead",
),
({"eps": -1.1}, r"eps must be greater than or equal to 0. Got -1.1 instead"),
],
)
def test_input_validation_errors(pyplot, kwargs, error_msg, fitted_clf):
"""Check input validation from_estimator."""
with pytest.raises(ValueError, match=error_msg):
DecisionBoundaryDisplay.from_estimator(fitted_clf, X, **kwargs)
@pytest.mark.parametrize(
"kwargs, error_msg",
[
(
{"multiclass_colors": {"dict": "not_list"}},
"'multiclass_colors' must be a list or a str.",
),
({"multiclass_colors": "not_cmap"}, "it must be a valid Matplotlib colormap"),
({"multiclass_colors": ["red", "green"]}, "it must be of the same length"),
(
{"multiclass_colors": ["red", "green", "not color"]},
"it can only contain valid Matplotlib color names",
),
],
)
def test_input_validation_errors_multiclass_colors(pyplot, kwargs, error_msg):
"""Check input validation for `multiclass_colors` in `from_estimator`."""
X, y = load_iris_2d_scaled()
clf = LogisticRegression().fit(X, y)
with pytest.raises(ValueError, match=error_msg):
DecisionBoundaryDisplay.from_estimator(clf, X, **kwargs)
def test_display_plot_input_error(pyplot, fitted_clf):
"""Check input validation for `plot`."""
disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, X, grid_resolution=5)
with pytest.raises(ValueError, match="plot_method must be 'contourf'"):
disp.plot(plot_method="hello_world")
@pytest.mark.parametrize(
"response_method", ["auto", "predict", "predict_proba", "decision_function"]
)
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_classifier(
pyplot, fitted_clf, response_method, plot_method
):
"""Check that decision boundary is correct."""
fig, ax = pyplot.subplots()
eps = 2.0
disp = DecisionBoundaryDisplay.from_estimator(
fitted_clf,
X,
grid_resolution=5,
response_method=response_method,
plot_method=plot_method,
eps=eps,
ax=ax,
)
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
assert disp.ax_ == ax
assert disp.figure_ == fig
x0, x1 = X[:, 0], X[:, 1]
x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps
assert disp.xx0.min() == pytest.approx(x0_min)
assert disp.xx0.max() == pytest.approx(x0_max)
assert disp.xx1.min() == pytest.approx(x1_min)
assert disp.xx1.max() == pytest.approx(x1_max)
fig2, ax2 = pyplot.subplots()
# change plotting method for second plot
disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
assert disp.ax_ == ax2
assert disp.figure_ == fig2
@pytest.mark.parametrize("response_method", ["auto", "predict", "decision_function"])
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_outlier_detector(
pyplot, response_method, plot_method
):
"""Check that decision boundary is correct for outlier detector."""
fig, ax = pyplot.subplots()
eps = 2.0
outlier_detector = IsolationForest(random_state=0).fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
outlier_detector,
X,
grid_resolution=5,
response_method=response_method,
plot_method=plot_method,
eps=eps,
ax=ax,
)
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
assert disp.ax_ == ax
assert disp.figure_ == fig
x0, x1 = X[:, 0], X[:, 1]
x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps
assert disp.xx0.min() == pytest.approx(x0_min)
assert disp.xx0.max() == pytest.approx(x0_max)
assert disp.xx1.min() == pytest.approx(x1_min)
assert disp.xx1.max() == pytest.approx(x1_max)
@pytest.mark.parametrize("response_method", ["auto", "predict"])
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
def test_decision_boundary_display_regressor(pyplot, response_method, plot_method):
"""Check that we can display the decision boundary for a regressor."""
X, y = load_diabetes(return_X_y=True)
X = X[:, :2]
tree = DecisionTreeRegressor().fit(X, y)
fig, ax = pyplot.subplots()
eps = 2.0
disp = DecisionBoundaryDisplay.from_estimator(
tree,
X,
response_method=response_method,
ax=ax,
eps=eps,
plot_method=plot_method,
)
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
assert disp.ax_ == ax
assert disp.figure_ == fig
x0, x1 = X[:, 0], X[:, 1]
x0_min, x0_max = x0.min() - eps, x0.max() + eps
x1_min, x1_max = x1.min() - eps, x1.max() + eps
assert disp.xx0.min() == pytest.approx(x0_min)
assert disp.xx0.max() == pytest.approx(x0_max)
assert disp.xx1.min() == pytest.approx(x1_min)
assert disp.xx1.max() == pytest.approx(x1_max)
fig2, ax2 = pyplot.subplots()
# change plotting method for second plot
disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
assert disp.ax_ == ax2
assert disp.figure_ == fig2
@pytest.mark.parametrize(
"response_method, msg",
[
(
"predict_proba",
"MyClassifier has none of the following attributes: predict_proba",
),
(
"decision_function",
"MyClassifier has none of the following attributes: decision_function",
),
(
"auto",
(
"MyClassifier has none of the following attributes: decision_function, "
"predict_proba, predict"
),
),
(
"bad_method",
"MyClassifier has none of the following attributes: bad_method",
),
],
)
def test_error_bad_response(pyplot, response_method, msg):
"""Check errors for bad response."""
class MyClassifier(ClassifierMixin, BaseEstimator):
def fit(self, X, y):
self.fitted_ = True
self.classes_ = [0, 1]
return self
clf = MyClassifier().fit(X, y)
with pytest.raises(AttributeError, match=msg):
DecisionBoundaryDisplay.from_estimator(clf, X, response_method=response_method)
@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
def test_multilabel_classifier_error(pyplot, response_method):
"""Check that multilabel classifier raises correct error."""
X, y = make_multilabel_classification(random_state=0)
X = X[:, :2]
tree = DecisionTreeClassifier().fit(X, y)
msg = "Multi-label and multi-output multi-class classifiers are not supported"
with pytest.raises(ValueError, match=msg):
DecisionBoundaryDisplay.from_estimator(
tree,
X,
response_method=response_method,
)
@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
def test_multi_output_multi_class_classifier_error(pyplot, response_method):
"""Check that multi-output multi-class classifier raises correct error."""
X = np.asarray([[0, 1], [1, 2]])
y = np.asarray([["tree", "cat"], ["cat", "tree"]])
tree = DecisionTreeClassifier().fit(X, y)
msg = "Multi-label and multi-output multi-class classifiers are not supported"
with pytest.raises(ValueError, match=msg):
DecisionBoundaryDisplay.from_estimator(
tree,
X,
response_method=response_method,
)
def test_multioutput_regressor_error(pyplot):
"""Check that multioutput regressor raises correct error."""
X = np.asarray([[0, 1], [1, 2]])
y = np.asarray([[0, 1], [4, 1]])
tree = DecisionTreeRegressor().fit(X, y)
with pytest.raises(ValueError, match="Multi-output regressors are not supported"):
DecisionBoundaryDisplay.from_estimator(tree, X, response_method="predict")
@pytest.mark.parametrize(
"response_method",
["predict_proba", "decision_function", ["predict_proba", "predict"]],
)
def test_regressor_unsupported_response(pyplot, response_method):
"""Check that we can display the decision boundary for a regressor."""
X, y = load_diabetes(return_X_y=True)
X = X[:, :2]
tree = DecisionTreeRegressor().fit(X, y)
err_msg = "should either be a classifier to be used with response_method"
with pytest.raises(ValueError, match=err_msg):
DecisionBoundaryDisplay.from_estimator(tree, X, response_method=response_method)
@pytest.mark.filterwarnings(
# We expect to raise the following warning because the classifier is fit on a
# NumPy array
"ignore:X has feature names, but LogisticRegression was fitted without"
)
def test_dataframe_labels_used(pyplot, fitted_clf):
"""Check that column names are used for pandas."""
pd = pytest.importorskip("pandas")
df = pd.DataFrame(X, columns=["col_x", "col_y"])
# pandas column names are used by default
_, ax = pyplot.subplots()
disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, df, ax=ax)
assert ax.get_xlabel() == "col_x"
assert ax.get_ylabel() == "col_y"
# second call to plot will have the names
fig, ax = pyplot.subplots()
disp.plot(ax=ax)
assert ax.get_xlabel() == "col_x"
assert ax.get_ylabel() == "col_y"
# axes with a label will not get overridden
fig, ax = pyplot.subplots()
ax.set(xlabel="hello", ylabel="world")
disp.plot(ax=ax)
assert ax.get_xlabel() == "hello"
assert ax.get_ylabel() == "world"
# labels get overridden only if provided to the `plot` method
disp.plot(ax=ax, xlabel="overwritten_x", ylabel="overwritten_y")
assert ax.get_xlabel() == "overwritten_x"
assert ax.get_ylabel() == "overwritten_y"
# labels do not get inferred if provided to `from_estimator`
_, ax = pyplot.subplots()
disp = DecisionBoundaryDisplay.from_estimator(
fitted_clf, df, ax=ax, xlabel="overwritten_x", ylabel="overwritten_y"
)
assert ax.get_xlabel() == "overwritten_x"
assert ax.get_ylabel() == "overwritten_y"
def test_string_target(pyplot):
"""Check that decision boundary works with classifiers trained on string labels."""
iris = load_iris()
X = iris.data[:, [0, 1]]
# Use strings as target
y = iris.target_names[iris.target]
log_reg = LogisticRegression().fit(X, y)
# Does not raise
DecisionBoundaryDisplay.from_estimator(
log_reg,
X,
grid_resolution=5,
response_method="predict",
)
@pytest.mark.parametrize("constructor_name", ["pandas", "polars"])
def test_dataframe_support(pyplot, constructor_name):
"""Check that passing a dataframe at fit and to the Display does not
raise warnings.
Non-regression test for:
* https://github.com/scikit-learn/scikit-learn/issues/23311
* https://github.com/scikit-learn/scikit-learn/issues/28717
"""
df = _convert_container(
X, constructor_name=constructor_name, columns_name=["col_x", "col_y"]
)
estimator = LogisticRegression().fit(df, y)
with warnings.catch_warnings():
# no warnings linked to feature names validation should be raised
warnings.simplefilter("error", UserWarning)
DecisionBoundaryDisplay.from_estimator(estimator, df, response_method="predict")
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
def test_class_of_interest_binary(pyplot, response_method):
"""Check the behaviour of passing `class_of_interest` for plotting the output of
`predict_proba` and `decision_function` in the binary case.
"""
iris = load_iris()
X = iris.data[:100, :2]
y = iris.target[:100]
assert_array_equal(np.unique(y), [0, 1])
estimator = LogisticRegression().fit(X, y)
# We will check that `class_of_interest=None` is equivalent to
# `class_of_interest=estimator.classes_[1]`
disp_default = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=None,
)
disp_class_1 = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=estimator.classes_[1],
)
assert_allclose(disp_default.response, disp_class_1.response)
# we can check that `_get_response_values` modifies the response when targeting
# the other class, i.e. 1 - p(y=1|x) for `predict_proba` and -decision_function
# for `decision_function`.
disp_class_0 = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=estimator.classes_[0],
)
if response_method == "predict_proba":
assert_allclose(disp_default.response, 1 - disp_class_0.response)
else:
assert response_method == "decision_function"
assert_allclose(disp_default.response, -disp_class_0.response)
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
def test_class_of_interest_multiclass(pyplot, response_method):
"""Check the behaviour of passing `class_of_interest` for plotting the output of
`predict_proba` and `decision_function` in the multiclass case.
"""
iris = load_iris()
X = iris.data[:, :2]
y = iris.target # the target are numerical labels
class_of_interest_idx = 2
estimator = LogisticRegression().fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=class_of_interest_idx,
)
# we will check that we plot the expected values as response
grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
assert_allclose(response.reshape(*disp.response.shape), disp.response)
# make the same test but this time using target as strings
y = iris.target_names[iris.target]
estimator = LogisticRegression().fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=iris.target_names[class_of_interest_idx],
)
grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
assert_allclose(response.reshape(*disp.response.shape), disp.response)
# check that we raise an error for unknown labels
# this test should already be handled in `_get_response_values` but we can have this
# test here as well
err_msg = "class_of_interest=2 is not a valid label: It should be one of"
with pytest.raises(ValueError, match=err_msg):
DecisionBoundaryDisplay.from_estimator(
estimator,
X,
response_method=response_method,
class_of_interest=class_of_interest_idx,
)
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
def test_multiclass_plot_max_class(pyplot, response_method):
"""Check plot correct when plotting max multiclass class."""
import matplotlib as mpl
# In matplotlib < v3.5, default value of `pcolormesh(shading)` is 'flat', which
# results in the last row and column being dropped. Thus older versions produce
# a 99x99 grid, while newer versions produce a 100x100 grid.
if parse_version(mpl.__version__) < parse_version("3.5"):
pytest.skip("`pcolormesh` in Matplotlib >= 3.5 gives smaller grid size.")
X, y = load_iris_2d_scaled()
clf = LogisticRegression().fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
clf,
X,
plot_method="pcolormesh",
response_method=response_method,
)
grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
response = getattr(clf, response_method)(grid).reshape(*disp.response.shape)
assert_allclose(response, disp.response)
assert len(disp.surface_) == len(clf.classes_)
# Get which class has highest response and check it is plotted
highest_class = np.argmax(response, axis=2)
for idx, quadmesh in enumerate(disp.surface_):
# Note quadmesh mask is True (i.e. masked) when `idx` is NOT the highest class
assert_array_equal(
highest_class != idx,
quadmesh.get_array().mask.reshape(*highest_class.shape),
)
@pytest.mark.parametrize(
"multiclass_colors",
[
"plasma",
"Blues",
["red", "green", "blue"],
],
)
@pytest.mark.parametrize("plot_method", ["contourf", "contour", "pcolormesh"])
def test_multiclass_colors_cmap(pyplot, plot_method, multiclass_colors):
"""Check correct cmap used for all `multiclass_colors` inputs."""
import matplotlib as mpl
if parse_version(mpl.__version__) < parse_version("3.5"):
pytest.skip(
"Matplotlib >= 3.5 is needed for `==` to check equivalence of colormaps"
)
X, y = load_iris_2d_scaled()
clf = LogisticRegression().fit(X, y)
disp = DecisionBoundaryDisplay.from_estimator(
clf,
X,
plot_method=plot_method,
multiclass_colors=multiclass_colors,
)
if multiclass_colors == "plasma":
colors = mpl.pyplot.get_cmap(multiclass_colors, len(clf.classes_)).colors
elif multiclass_colors == "Blues":
cmap = mpl.pyplot.get_cmap(multiclass_colors, len(clf.classes_))
colors = cmap(np.linspace(0, 1, len(clf.classes_)))
else:
colors = [mpl.colors.to_rgba(color) for color in multiclass_colors]
if plot_method != "contour":
cmaps = [
mpl.colors.LinearSegmentedColormap.from_list(
f"colormap_{class_idx}", [(1.0, 1.0, 1.0, 1.0), (r, g, b, 1.0)]
)
for class_idx, (r, g, b, _) in enumerate(colors)
]
for idx, quad in enumerate(disp.surface_):
assert quad.cmap == cmaps[idx]
else:
assert_allclose(disp.surface_.colors, colors)
def test_cmap_and_colors_logic(pyplot):
"""Check the handling logic for `cmap` and `colors`."""
X, y = load_iris_2d_scaled()
clf = LogisticRegression().fit(X, y)
with pytest.warns(
UserWarning,
match="'cmap' is ignored in favor of 'multiclass_colors'",
):
DecisionBoundaryDisplay.from_estimator(
clf,
X,
multiclass_colors="plasma",
cmap="Blues",
)
with pytest.warns(
UserWarning,
match="'colors' is ignored in favor of 'multiclass_colors'",
):
DecisionBoundaryDisplay.from_estimator(
clf,
X,
multiclass_colors="plasma",
colors="blue",
)
def test_subclass_named_constructors_return_type_is_subclass(pyplot):
"""Check that named constructors return the correct type when subclassed.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/pull/27675
"""
clf = LogisticRegression().fit(X, y)
class SubclassOfDisplay(DecisionBoundaryDisplay):
pass
curve = SubclassOfDisplay.from_estimator(estimator=clf, X=X)
assert isinstance(curve, SubclassOfDisplay)

View File

@@ -0,0 +1,47 @@
import numpy as np
import pytest
from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index
from sklearn.utils._testing import _convert_container
@pytest.mark.parametrize(
"feature_names, array_type, expected_feature_names",
[
(None, "array", ["x0", "x1", "x2"]),
(None, "dataframe", ["a", "b", "c"]),
(np.array(["a", "b", "c"]), "array", ["a", "b", "c"]),
],
)
def test_check_feature_names(feature_names, array_type, expected_feature_names):
X = np.random.randn(10, 3)
column_names = ["a", "b", "c"]
X = _convert_container(X, constructor_name=array_type, columns_name=column_names)
feature_names_validated = _check_feature_names(X, feature_names)
assert feature_names_validated == expected_feature_names
def test_check_feature_names_error():
X = np.random.randn(10, 3)
feature_names = ["a", "b", "c", "a"]
msg = "feature_names should not contain duplicates."
with pytest.raises(ValueError, match=msg):
_check_feature_names(X, feature_names)
@pytest.mark.parametrize("fx, idx", [(0, 0), (1, 1), ("a", 0), ("b", 1), ("c", 2)])
def test_get_feature_index(fx, idx):
feature_names = ["a", "b", "c"]
assert _get_feature_index(fx, feature_names) == idx
@pytest.mark.parametrize(
"fx, feature_names, err_msg",
[
("a", None, "Cannot plot partial dependence for feature 'a'"),
("d", ["a", "b", "c"], "Feature 'd' not in feature_names"),
],
)
def test_get_feature_names_error(fx, feature_names, err_msg):
with pytest.raises(ValueError, match=err_msg):
_get_feature_index(fx, feature_names)

View File

@@ -0,0 +1,540 @@
import numpy as np
import pytest
from joblib import parallel_backend
from numpy.testing import assert_allclose
from sklearn.compose import ColumnTransformer
from sklearn.datasets import (
load_diabetes,
load_iris,
make_classification,
make_regression,
)
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.inspection import permutation_importance
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import (
get_scorer,
mean_squared_error,
r2_score,
)
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale
from sklearn.utils._testing import _convert_container
@pytest.mark.parametrize("n_jobs", [1, 2])
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
@pytest.mark.parametrize("sample_weight", [None, "ones"])
def test_permutation_importance_correlated_feature_regression(
n_jobs, max_samples, sample_weight
):
# Make sure that feature highly correlated to the target have a higher
# importance
rng = np.random.RandomState(42)
n_repeats = 5
X, y = load_diabetes(return_X_y=True)
y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
X = np.hstack([X, y_with_little_noise])
weights = np.ones_like(y) if sample_weight == "ones" else sample_weight
clf = RandomForestRegressor(n_estimators=10, random_state=42)
clf.fit(X, y)
result = permutation_importance(
clf,
X,
y,
sample_weight=weights,
n_repeats=n_repeats,
random_state=rng,
n_jobs=n_jobs,
max_samples=max_samples,
)
assert result.importances.shape == (X.shape[1], n_repeats)
# the correlated feature with y was added as the last column and should
# have the highest importance
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
@pytest.mark.parametrize("n_jobs", [1, 2])
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
def test_permutation_importance_correlated_feature_regression_pandas(
n_jobs, max_samples
):
pd = pytest.importorskip("pandas")
# Make sure that feature highly correlated to the target have a higher
# importance
rng = np.random.RandomState(42)
n_repeats = 5
dataset = load_iris()
X, y = dataset.data, dataset.target
y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
# Adds feature correlated with y as the last column
X = pd.DataFrame(X, columns=dataset.feature_names)
X["correlated_feature"] = y_with_little_noise
clf = RandomForestClassifier(n_estimators=10, random_state=42)
clf.fit(X, y)
result = permutation_importance(
clf,
X,
y,
n_repeats=n_repeats,
random_state=rng,
n_jobs=n_jobs,
max_samples=max_samples,
)
assert result.importances.shape == (X.shape[1], n_repeats)
# the correlated feature with y was added as the last column and should
# have the highest importance
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
@pytest.mark.parametrize("n_jobs", [1, 2])
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
def test_robustness_to_high_cardinality_noisy_feature(n_jobs, max_samples, seed=42):
# Permutation variable importance should not be affected by the high
# cardinality bias of traditional feature importances, especially when
# computed on a held-out test set:
rng = np.random.RandomState(seed)
n_repeats = 5
n_samples = 1000
n_classes = 5
n_informative_features = 2
n_noise_features = 1
n_features = n_informative_features + n_noise_features
# Generate a multiclass classification dataset and a set of informative
# binary features that can be used to predict some classes of y exactly
# while leaving some classes unexplained to make the problem harder.
classes = np.arange(n_classes)
y = rng.choice(classes, size=n_samples)
X = np.hstack([(y == c).reshape(-1, 1) for c in classes[:n_informative_features]])
X = X.astype(np.float32)
# Not all target classes are explained by the binary class indicator
# features:
assert n_informative_features < n_classes
# Add 10 other noisy features with high cardinality (numerical) values
# that can be used to overfit the training data.
X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
assert X.shape == (n_samples, n_features)
# Split the dataset to be able to evaluate on a held-out test set. The
# Test size should be large enough for importance measurements to be
# stable:
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, random_state=rng
)
clf = RandomForestClassifier(n_estimators=5, random_state=rng)
clf.fit(X_train, y_train)
# Variable importances computed by impurity decrease on the tree node
# splits often use the noisy features in splits. This can give misleading
# impression that high cardinality noisy variables are the most important:
tree_importances = clf.feature_importances_
informative_tree_importances = tree_importances[:n_informative_features]
noisy_tree_importances = tree_importances[n_informative_features:]
assert informative_tree_importances.max() < noisy_tree_importances.min()
# Let's check that permutation-based feature importances do not have this
# problem.
r = permutation_importance(
clf,
X_test,
y_test,
n_repeats=n_repeats,
random_state=rng,
n_jobs=n_jobs,
max_samples=max_samples,
)
assert r.importances.shape == (X.shape[1], n_repeats)
# Split the importances between informative and noisy features
informative_importances = r.importances_mean[:n_informative_features]
noisy_importances = r.importances_mean[n_informative_features:]
# Because we do not have a binary variable explaining each target classes,
# the RF model will have to use the random variable to make some
# (overfitting) splits (as max_depth is not set). Therefore the noisy
# variables will be non-zero but with small values oscillating around
# zero:
assert max(np.abs(noisy_importances)) > 1e-7
assert noisy_importances.max() < 0.05
# The binary features correlated with y should have a higher importance
# than the high cardinality noisy features.
# The maximum test accuracy is 2 / 5 == 0.4, each informative feature
# contributing approximately a bit more than 0.2 of accuracy.
assert informative_importances.min() > 0.15
def test_permutation_importance_mixed_types():
rng = np.random.RandomState(42)
n_repeats = 4
# Last column is correlated with y
X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
y = np.array([0, 1, 0, 1])
clf = make_pipeline(SimpleImputer(), LogisticRegression(solver="lbfgs"))
clf.fit(X, y)
result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
assert result.importances.shape == (X.shape[1], n_repeats)
# the correlated feature with y is the last column and should
# have the highest importance
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
# use another random state
rng = np.random.RandomState(0)
result2 = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
assert result2.importances.shape == (X.shape[1], n_repeats)
assert not np.allclose(result.importances, result2.importances)
# the correlated feature with y is the last column and should
# have the highest importance
assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
def test_permutation_importance_mixed_types_pandas():
pd = pytest.importorskip("pandas")
rng = np.random.RandomState(42)
n_repeats = 5
# Last column is correlated with y
X = pd.DataFrame({"col1": [1.0, 2.0, 3.0, np.nan], "col2": ["a", "b", "a", "b"]})
y = np.array([0, 1, 0, 1])
num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
preprocess = ColumnTransformer(
[("num", num_preprocess, ["col1"]), ("cat", OneHotEncoder(), ["col2"])]
)
clf = make_pipeline(preprocess, LogisticRegression(solver="lbfgs"))
clf.fit(X, y)
result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
assert result.importances.shape == (X.shape[1], n_repeats)
# the correlated feature with y is the last column and should
# have the highest importance
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
def test_permutation_importance_linear_regresssion():
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
X = scale(X)
y = scale(y)
lr = LinearRegression().fit(X, y)
# this relationship can be computed in closed form
expected_importances = 2 * lr.coef_**2
results = permutation_importance(
lr, X, y, n_repeats=50, scoring="neg_mean_squared_error"
)
assert_allclose(
expected_importances, results.importances_mean, rtol=1e-1, atol=1e-6
)
@pytest.mark.parametrize("max_samples", [500, 1.0])
def test_permutation_importance_equivalence_sequential_parallel(max_samples):
# regression test to make sure that sequential and parallel calls will
# output the same results.
# Also tests that max_samples equal to number of samples is equivalent to 1.0
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
lr = LinearRegression().fit(X, y)
importance_sequential = permutation_importance(
lr, X, y, n_repeats=5, random_state=0, n_jobs=1, max_samples=max_samples
)
# First check that the problem is structured enough and that the model is
# complex enough to not yield trivial, constant importances:
imp_min = importance_sequential["importances"].min()
imp_max = importance_sequential["importances"].max()
assert imp_max - imp_min > 0.3
# The actually check that parallelism does not impact the results
# either with shared memory (threading) or without isolated memory
# via process-based parallelism using the default backend
# ('loky' or 'multiprocessing') depending on the joblib version:
# process-based parallelism (by default):
importance_processes = permutation_importance(
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
)
assert_allclose(
importance_processes["importances"], importance_sequential["importances"]
)
# thread-based parallelism:
with parallel_backend("threading"):
importance_threading = permutation_importance(
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
)
assert_allclose(
importance_threading["importances"], importance_sequential["importances"]
)
@pytest.mark.parametrize("n_jobs", [None, 1, 2])
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
def test_permutation_importance_equivalence_array_dataframe(n_jobs, max_samples):
# This test checks that the column shuffling logic has the same behavior
# both a dataframe and a simple numpy array.
pd = pytest.importorskip("pandas")
# regression test to make sure that sequential and parallel calls will
# output the same results.
X, y = make_regression(n_samples=100, n_features=5, random_state=0)
X_df = pd.DataFrame(X)
# Add a categorical feature that is statistically linked to y:
binner = KBinsDiscretizer(
n_bins=3,
encode="ordinal",
quantile_method="averaged_inverted_cdf",
)
cat_column = binner.fit_transform(y.reshape(-1, 1))
# Concatenate the extra column to the numpy array: integers will be
# cast to float values
X = np.hstack([X, cat_column])
assert X.dtype.kind == "f"
# Insert extra column as a non-numpy-native dtype:
cat_column = pd.Categorical(cat_column.ravel())
new_col_idx = len(X_df.columns)
X_df[new_col_idx] = cat_column
assert X_df[new_col_idx].dtype == cat_column.dtype
# Stich an arbitrary index to the dataframe:
X_df.index = np.arange(len(X_df)).astype(str)
rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
rf.fit(X, y)
n_repeats = 3
importance_array = permutation_importance(
rf,
X,
y,
n_repeats=n_repeats,
random_state=0,
n_jobs=n_jobs,
max_samples=max_samples,
)
# First check that the problem is structured enough and that the model is
# complex enough to not yield trivial, constant importances:
imp_min = importance_array["importances"].min()
imp_max = importance_array["importances"].max()
assert imp_max - imp_min > 0.3
# Now check that importances computed on dataframe matche the values
# of those computed on the array with the same data.
importance_dataframe = permutation_importance(
rf,
X_df,
y,
n_repeats=n_repeats,
random_state=0,
n_jobs=n_jobs,
max_samples=max_samples,
)
assert_allclose(
importance_array["importances"], importance_dataframe["importances"]
)
@pytest.mark.parametrize("input_type", ["array", "dataframe"])
def test_permutation_importance_large_memmaped_data(input_type):
# Smoke, non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/15810
n_samples, n_features = int(5e4), 4
X, y = make_classification(
n_samples=n_samples, n_features=n_features, random_state=0
)
assert X.nbytes > 1e6 # trigger joblib memmaping
X = _convert_container(X, input_type)
clf = DummyClassifier(strategy="prior").fit(X, y)
# Actual smoke test: should not raise any error:
n_repeats = 5
r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
# Auxiliary check: DummyClassifier is feature independent:
# permutating feature should not change the predictions
expected_importances = np.zeros((n_features, n_repeats))
assert_allclose(expected_importances, r.importances)
def test_permutation_importance_sample_weight():
# Creating data with 2 features and 1000 samples, where the target
# variable is a linear combination of the two features, such that
# in half of the samples the impact of feature 1 is twice the impact of
# feature 2, and vice versa on the other half of the samples.
rng = np.random.RandomState(1)
n_samples = 1000
n_features = 2
n_half_samples = n_samples // 2
x = rng.normal(0.0, 0.001, (n_samples, n_features))
y = np.zeros(n_samples)
y[:n_half_samples] = 2 * x[:n_half_samples, 0] + x[:n_half_samples, 1]
y[n_half_samples:] = x[n_half_samples:, 0] + 2 * x[n_half_samples:, 1]
# Fitting linear regression with perfect prediction
lr = LinearRegression(fit_intercept=False)
lr.fit(x, y)
# When all samples are weighted with the same weights, the ratio of
# the two features importance should equal to 1 on expectation (when using
# mean absolutes error as the loss function).
pi = permutation_importance(
lr, x, y, random_state=1, scoring="neg_mean_absolute_error", n_repeats=200
)
x1_x2_imp_ratio_w_none = pi.importances_mean[0] / pi.importances_mean[1]
assert x1_x2_imp_ratio_w_none == pytest.approx(1, 0.01)
# When passing a vector of ones as the sample_weight, results should be
# the same as in the case that sample_weight=None.
w = np.ones(n_samples)
pi = permutation_importance(
lr,
x,
y,
random_state=1,
scoring="neg_mean_absolute_error",
n_repeats=200,
sample_weight=w,
)
x1_x2_imp_ratio_w_ones = pi.importances_mean[0] / pi.importances_mean[1]
assert x1_x2_imp_ratio_w_ones == pytest.approx(x1_x2_imp_ratio_w_none, 0.01)
# When the ratio between the weights of the first half of the samples and
# the second half of the samples approaches to infinity, the ratio of
# the two features importance should equal to 2 on expectation (when using
# mean absolutes error as the loss function).
w = np.hstack([np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)])
lr.fit(x, y, w)
pi = permutation_importance(
lr,
x,
y,
random_state=1,
scoring="neg_mean_absolute_error",
n_repeats=200,
sample_weight=w,
)
x1_x2_imp_ratio_w = pi.importances_mean[0] / pi.importances_mean[1]
assert x1_x2_imp_ratio_w / x1_x2_imp_ratio_w_none == pytest.approx(2, 0.01)
def test_permutation_importance_no_weights_scoring_function():
# Creating a scorer function that does not takes sample_weight
def my_scorer(estimator, X, y):
return 1
# Creating some data and estimator for the permutation test
x = np.array([[1, 2], [3, 4]])
y = np.array([1, 2])
w = np.array([1, 1])
lr = LinearRegression()
lr.fit(x, y)
# test that permutation_importance does not return error when
# sample_weight is None
try:
permutation_importance(lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1)
except TypeError:
pytest.fail(
"permutation_test raised an error when using a scorer "
"function that does not accept sample_weight even though "
"sample_weight was None"
)
# test that permutation_importance raise exception when sample_weight is
# not None
with pytest.raises(TypeError):
permutation_importance(
lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1, sample_weight=w
)
@pytest.mark.parametrize(
"list_single_scorer, multi_scorer",
[
(["r2", "neg_mean_squared_error"], ["r2", "neg_mean_squared_error"]),
(
["r2", "neg_mean_squared_error"],
{
"r2": get_scorer("r2"),
"neg_mean_squared_error": get_scorer("neg_mean_squared_error"),
},
),
(
["r2", "neg_mean_squared_error"],
lambda estimator, X, y: {
"r2": r2_score(y, estimator.predict(X)),
"neg_mean_squared_error": -mean_squared_error(y, estimator.predict(X)),
},
),
],
)
def test_permutation_importance_multi_metric(list_single_scorer, multi_scorer):
# Test permutation importance when scoring contains multiple scorers
# Creating some data and estimator for the permutation test
x, y = make_regression(n_samples=500, n_features=10, random_state=0)
lr = LinearRegression().fit(x, y)
multi_importance = permutation_importance(
lr, x, y, random_state=1, scoring=multi_scorer, n_repeats=2
)
assert set(multi_importance.keys()) == set(list_single_scorer)
for scorer in list_single_scorer:
multi_result = multi_importance[scorer]
single_result = permutation_importance(
lr, x, y, random_state=1, scoring=scorer, n_repeats=2
)
assert_allclose(multi_result.importances, single_result.importances)
def test_permutation_importance_max_samples_error():
"""Check that a proper error message is raised when `max_samples` is not
set to a valid input value.
"""
X = np.array([(1.0, 2.0, 3.0, 4.0)]).T
y = np.array([0, 1, 0, 1])
clf = LogisticRegression()
clf.fit(X, y)
err_msg = r"max_samples must be <= n_samples"
with pytest.raises(ValueError, match=err_msg):
permutation_importance(clf, X, y, max_samples=5)