add read me
This commit is contained in:
@@ -0,0 +1,16 @@
|
||||
"""Tools for model inspection."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from ._partial_dependence import partial_dependence
|
||||
from ._permutation_importance import permutation_importance
|
||||
from ._plot.decision_boundary import DecisionBoundaryDisplay
|
||||
from ._plot.partial_dependence import PartialDependenceDisplay
|
||||
|
||||
__all__ = [
|
||||
"DecisionBoundaryDisplay",
|
||||
"PartialDependenceDisplay",
|
||||
"partial_dependence",
|
||||
"permutation_importance",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,775 @@
|
||||
"""Partial dependence plots for regression and classification models."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import warnings
|
||||
from collections.abc import Iterable
|
||||
|
||||
import numpy as np
|
||||
from scipy import sparse
|
||||
from scipy.stats.mstats import mquantiles
|
||||
|
||||
from ..base import is_classifier, is_regressor
|
||||
from ..ensemble import RandomForestRegressor
|
||||
from ..ensemble._gb import BaseGradientBoosting
|
||||
from ..ensemble._hist_gradient_boosting.gradient_boosting import (
|
||||
BaseHistGradientBoosting,
|
||||
)
|
||||
from ..tree import DecisionTreeRegressor
|
||||
from ..utils import Bunch, _safe_indexing, check_array
|
||||
from ..utils._indexing import _determine_key_type, _get_column_indices, _safe_assign
|
||||
from ..utils._optional_dependencies import check_matplotlib_support # noqa: F401
|
||||
from ..utils._param_validation import (
|
||||
HasMethods,
|
||||
Integral,
|
||||
Interval,
|
||||
StrOptions,
|
||||
validate_params,
|
||||
)
|
||||
from ..utils._response import _get_response_values
|
||||
from ..utils.extmath import cartesian
|
||||
from ..utils.validation import _check_sample_weight, check_is_fitted
|
||||
from ._pd_utils import _check_feature_names, _get_feature_index
|
||||
|
||||
__all__ = [
|
||||
"partial_dependence",
|
||||
]
|
||||
|
||||
|
||||
def _grid_from_X(X, percentiles, is_categorical, grid_resolution, custom_values):
|
||||
"""Generate a grid of points based on the percentiles of X.
|
||||
|
||||
The grid is a cartesian product between the columns of ``values``. The
|
||||
ith column of ``values`` consists in ``grid_resolution`` equally-spaced
|
||||
points between the percentiles of the jth column of X.
|
||||
|
||||
If ``grid_resolution`` is bigger than the number of unique values in the
|
||||
j-th column of X or if the feature is a categorical feature (by inspecting
|
||||
`is_categorical`) , then those unique values will be used instead.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_target_features)
|
||||
The data.
|
||||
|
||||
percentiles : tuple of float
|
||||
The percentiles which are used to construct the extreme values of
|
||||
the grid. Must be in [0, 1].
|
||||
|
||||
is_categorical : list of bool
|
||||
For each feature, tells whether it is categorical or not. If a feature
|
||||
is categorical, then the values used will be the unique ones
|
||||
(i.e. categories) instead of the percentiles.
|
||||
|
||||
grid_resolution : int
|
||||
The number of equally spaced points to be placed on the grid for each
|
||||
feature.
|
||||
|
||||
custom_values: dict
|
||||
Mapping from column index of X to an array-like of values where
|
||||
the partial dependence should be calculated for that feature
|
||||
|
||||
Returns
|
||||
-------
|
||||
grid : ndarray of shape (n_points, n_target_features)
|
||||
A value for each feature at each point in the grid. ``n_points`` is
|
||||
always ``<= grid_resolution ** X.shape[1]``.
|
||||
|
||||
values : list of 1d ndarrays
|
||||
The values with which the grid has been created. The size of each
|
||||
array ``values[j]`` is either ``grid_resolution``, the number of
|
||||
unique values in ``X[:, j]``, if j is not in ``custom_range``.
|
||||
If j is in ``custom_range``, then it is the length of ``custom_range[j]``.
|
||||
"""
|
||||
if not isinstance(percentiles, Iterable) or len(percentiles) != 2:
|
||||
raise ValueError("'percentiles' must be a sequence of 2 elements.")
|
||||
if not all(0 <= x <= 1 for x in percentiles):
|
||||
raise ValueError("'percentiles' values must be in [0, 1].")
|
||||
if percentiles[0] >= percentiles[1]:
|
||||
raise ValueError("percentiles[0] must be strictly less than percentiles[1].")
|
||||
|
||||
if grid_resolution <= 1:
|
||||
raise ValueError("'grid_resolution' must be strictly greater than 1.")
|
||||
|
||||
def _convert_custom_values(values):
|
||||
# Convert custom types such that object types are always used for string arrays
|
||||
dtype = object if any(isinstance(v, str) for v in values) else None
|
||||
return np.asarray(values, dtype=dtype)
|
||||
|
||||
custom_values = {k: _convert_custom_values(v) for k, v in custom_values.items()}
|
||||
if any(v.ndim != 1 for v in custom_values.values()):
|
||||
error_string = ", ".join(
|
||||
f"Feature {k}: {v.ndim} dimensions"
|
||||
for k, v in custom_values.items()
|
||||
if v.ndim != 1
|
||||
)
|
||||
|
||||
raise ValueError(
|
||||
"The custom grid for some features is not a one-dimensional array. "
|
||||
f"{error_string}"
|
||||
)
|
||||
|
||||
values = []
|
||||
# TODO: we should handle missing values (i.e. `np.nan`) specifically and store them
|
||||
# in a different Bunch attribute.
|
||||
for feature, is_cat in enumerate(is_categorical):
|
||||
if feature in custom_values:
|
||||
# Use values in the custom range
|
||||
axis = custom_values[feature]
|
||||
else:
|
||||
try:
|
||||
uniques = np.unique(_safe_indexing(X, feature, axis=1))
|
||||
except TypeError as exc:
|
||||
# `np.unique` will fail in the presence of `np.nan` and `str` categories
|
||||
# due to sorting. Temporary, we reraise an error explaining the problem.
|
||||
raise ValueError(
|
||||
f"The column #{feature} contains mixed data types. Finding unique "
|
||||
"categories fail due to sorting. It usually means that the column "
|
||||
"contains `np.nan` values together with `str` categories. Such use "
|
||||
"case is not yet supported in scikit-learn."
|
||||
) from exc
|
||||
|
||||
if is_cat or uniques.shape[0] < grid_resolution:
|
||||
# Use the unique values either because:
|
||||
# - feature has low resolution use unique values
|
||||
# - feature is categorical
|
||||
axis = uniques
|
||||
else:
|
||||
# create axis based on percentiles and grid resolution
|
||||
emp_percentiles = mquantiles(
|
||||
_safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
|
||||
)
|
||||
if np.allclose(emp_percentiles[0], emp_percentiles[1]):
|
||||
raise ValueError(
|
||||
"percentiles are too close to each other, "
|
||||
"unable to build the grid. Please choose percentiles "
|
||||
"that are further apart."
|
||||
)
|
||||
axis = np.linspace(
|
||||
emp_percentiles[0],
|
||||
emp_percentiles[1],
|
||||
num=grid_resolution,
|
||||
endpoint=True,
|
||||
)
|
||||
values.append(axis)
|
||||
|
||||
return cartesian(values), values
|
||||
|
||||
|
||||
def _partial_dependence_recursion(est, grid, features):
|
||||
"""Calculate partial dependence via the recursion method.
|
||||
|
||||
The recursion method is in particular enabled for tree-based estimators.
|
||||
|
||||
For each `grid` value, a weighted tree traversal is performed: if a split node
|
||||
involves an input feature of interest, the corresponding left or right branch
|
||||
is followed; otherwise both branches are followed, each branch being weighted
|
||||
by the fraction of training samples that entered that branch. Finally, the
|
||||
partial dependence is given by a weighted average of all the visited leaves
|
||||
values.
|
||||
|
||||
This method is more efficient in terms of speed than the `'brute'` method
|
||||
(:func:`~sklearn.inspection._partial_dependence._partial_dependence_brute`).
|
||||
However, here, the partial dependence computation is done explicitly with the
|
||||
`X` used during training of `est`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
est : BaseEstimator
|
||||
A fitted estimator object implementing :term:`predict` or
|
||||
:term:`decision_function`. Multioutput-multiclass classifiers are not
|
||||
supported. Note that `'recursion'` is only supported for some tree-based
|
||||
estimators (namely
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
|
||||
:class:`~sklearn.tree.DecisionTreeRegressor`,
|
||||
:class:`~sklearn.ensemble.RandomForestRegressor`,
|
||||
).
|
||||
|
||||
grid : array-like of shape (n_points, n_target_features)
|
||||
The grid of feature values for which the partial dependence is calculated.
|
||||
Note that `n_points` is the number of points in the grid and `n_target_features`
|
||||
is the number of features you are doing partial dependence at.
|
||||
|
||||
features : array-like of {int, str}
|
||||
The feature (e.g. `[0]`) or pair of interacting features
|
||||
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
averaged_predictions : array-like of shape (n_targets, n_points)
|
||||
The averaged predictions for the given `grid` of features values.
|
||||
Note that `n_targets` is the number of targets (e.g. 1 for binary
|
||||
classification, `n_tasks` for multi-output regression, and `n_classes` for
|
||||
multiclass classification) and `n_points` is the number of points in the `grid`.
|
||||
"""
|
||||
averaged_predictions = est._compute_partial_dependence_recursion(grid, features)
|
||||
if averaged_predictions.ndim == 1:
|
||||
# reshape to (1, n_points) for consistency with
|
||||
# _partial_dependence_brute
|
||||
averaged_predictions = averaged_predictions.reshape(1, -1)
|
||||
|
||||
return averaged_predictions
|
||||
|
||||
|
||||
def _partial_dependence_brute(
|
||||
est, grid, features, X, response_method, sample_weight=None
|
||||
):
|
||||
"""Calculate partial dependence via the brute force method.
|
||||
|
||||
The brute method explicitly averages the predictions of an estimator over a
|
||||
grid of feature values.
|
||||
|
||||
For each `grid` value, all the samples from `X` have their variables of
|
||||
interest replaced by that specific `grid` value. The predictions are then made
|
||||
and averaged across the samples.
|
||||
|
||||
This method is slower than the `'recursion'`
|
||||
(:func:`~sklearn.inspection._partial_dependence._partial_dependence_recursion`)
|
||||
version for estimators with this second option. However, with the `'brute'`
|
||||
force method, the average will be done with the given `X` and not the `X`
|
||||
used during training, as it is done in the `'recursion'` version. Therefore
|
||||
the average can always accept `sample_weight` (even when the estimator was
|
||||
fitted without).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
est : BaseEstimator
|
||||
A fitted estimator object implementing :term:`predict`,
|
||||
:term:`predict_proba`, or :term:`decision_function`.
|
||||
Multioutput-multiclass classifiers are not supported.
|
||||
|
||||
grid : array-like of shape (n_points, n_target_features)
|
||||
The grid of feature values for which the partial dependence is calculated.
|
||||
Note that `n_points` is the number of points in the grid and `n_target_features`
|
||||
is the number of features you are doing partial dependence at.
|
||||
|
||||
features : array-like of {int, str}
|
||||
The feature (e.g. `[0]`) or pair of interacting features
|
||||
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
|
||||
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
`X` is used to generate values for the complement features. That is, for
|
||||
each value in `grid`, the method will average the prediction of each
|
||||
sample from `X` having that grid value for `features`.
|
||||
|
||||
response_method : {'auto', 'predict_proba', 'decision_function'}, \
|
||||
default='auto'
|
||||
Specifies whether to use :term:`predict_proba` or
|
||||
:term:`decision_function` as the target response. For regressors
|
||||
this parameter is ignored and the response is always the output of
|
||||
:term:`predict`. By default, :term:`predict_proba` is tried first
|
||||
and we revert to :term:`decision_function` if it doesn't exist.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights are used to calculate weighted means when averaging the
|
||||
model output. If `None`, then samples are equally weighted. Note that
|
||||
`sample_weight` does not change the individual predictions.
|
||||
|
||||
Returns
|
||||
-------
|
||||
averaged_predictions : array-like of shape (n_targets, n_points)
|
||||
The averaged predictions for the given `grid` of features values.
|
||||
Note that `n_targets` is the number of targets (e.g. 1 for binary
|
||||
classification, `n_tasks` for multi-output regression, and `n_classes` for
|
||||
multiclass classification) and `n_points` is the number of points in the `grid`.
|
||||
|
||||
predictions : array-like
|
||||
The predictions for the given `grid` of features values over the samples
|
||||
from `X`. For non-multioutput regression and binary classification the
|
||||
shape is `(n_instances, n_points)` and for multi-output regression and
|
||||
multiclass classification the shape is `(n_targets, n_instances, n_points)`,
|
||||
where `n_targets` is the number of targets (`n_tasks` for multi-output
|
||||
regression, and `n_classes` for multiclass classification), `n_instances`
|
||||
is the number of instances in `X`, and `n_points` is the number of points
|
||||
in the `grid`.
|
||||
"""
|
||||
predictions = []
|
||||
averaged_predictions = []
|
||||
|
||||
if response_method == "auto":
|
||||
response_method = (
|
||||
"predict" if is_regressor(est) else ["predict_proba", "decision_function"]
|
||||
)
|
||||
|
||||
X_eval = X.copy()
|
||||
for new_values in grid:
|
||||
for i, variable in enumerate(features):
|
||||
_safe_assign(X_eval, new_values[i], column_indexer=variable)
|
||||
|
||||
# Note: predictions is of shape
|
||||
# (n_points,) for non-multioutput regressors
|
||||
# (n_points, n_tasks) for multioutput regressors
|
||||
# (n_points, 1) for the regressors in cross_decomposition (I think)
|
||||
# (n_points, 1) for binary classification (positive class already selected)
|
||||
# (n_points, n_classes) for multiclass classification
|
||||
pred, _ = _get_response_values(est, X_eval, response_method=response_method)
|
||||
|
||||
predictions.append(pred)
|
||||
# average over samples
|
||||
averaged_predictions.append(np.average(pred, axis=0, weights=sample_weight))
|
||||
|
||||
n_samples = X.shape[0]
|
||||
|
||||
# reshape to (n_targets, n_instances, n_points) where n_targets is:
|
||||
# - 1 for non-multioutput regression and binary classification (shape is
|
||||
# already correct in those cases)
|
||||
# - n_tasks for multi-output regression
|
||||
# - n_classes for multiclass classification.
|
||||
predictions = np.array(predictions).T
|
||||
if is_regressor(est) and predictions.ndim == 2:
|
||||
# non-multioutput regression, shape is (n_instances, n_points,)
|
||||
predictions = predictions.reshape(n_samples, -1)
|
||||
elif is_classifier(est) and predictions.shape[0] == 2:
|
||||
# Binary classification, shape is (2, n_instances, n_points).
|
||||
# we output the effect of **positive** class
|
||||
predictions = predictions[1]
|
||||
predictions = predictions.reshape(n_samples, -1)
|
||||
|
||||
# reshape averaged_predictions to (n_targets, n_points) where n_targets is:
|
||||
# - 1 for non-multioutput regression and binary classification (shape is
|
||||
# already correct in those cases)
|
||||
# - n_tasks for multi-output regression
|
||||
# - n_classes for multiclass classification.
|
||||
averaged_predictions = np.array(averaged_predictions).T
|
||||
if averaged_predictions.ndim == 1:
|
||||
# reshape to (1, n_points) for consistency with
|
||||
# _partial_dependence_recursion
|
||||
averaged_predictions = averaged_predictions.reshape(1, -1)
|
||||
|
||||
return averaged_predictions, predictions
|
||||
|
||||
|
||||
@validate_params(
|
||||
{
|
||||
"estimator": [
|
||||
HasMethods(["fit", "predict"]),
|
||||
HasMethods(["fit", "predict_proba"]),
|
||||
HasMethods(["fit", "decision_function"]),
|
||||
],
|
||||
"X": ["array-like", "sparse matrix"],
|
||||
"features": ["array-like", Integral, str],
|
||||
"sample_weight": ["array-like", None],
|
||||
"categorical_features": ["array-like", None],
|
||||
"feature_names": ["array-like", None],
|
||||
"response_method": [StrOptions({"auto", "predict_proba", "decision_function"})],
|
||||
"percentiles": [tuple],
|
||||
"grid_resolution": [Interval(Integral, 1, None, closed="left")],
|
||||
"method": [StrOptions({"auto", "recursion", "brute"})],
|
||||
"kind": [StrOptions({"average", "individual", "both"})],
|
||||
"custom_values": [dict, None],
|
||||
},
|
||||
prefer_skip_nested_validation=True,
|
||||
)
|
||||
def partial_dependence(
|
||||
estimator,
|
||||
X,
|
||||
features,
|
||||
*,
|
||||
sample_weight=None,
|
||||
categorical_features=None,
|
||||
feature_names=None,
|
||||
response_method="auto",
|
||||
percentiles=(0.05, 0.95),
|
||||
grid_resolution=100,
|
||||
custom_values=None,
|
||||
method="auto",
|
||||
kind="average",
|
||||
):
|
||||
"""Partial dependence of ``features``.
|
||||
|
||||
Partial dependence of a feature (or a set of features) corresponds to
|
||||
the average response of an estimator for each possible value of the
|
||||
feature.
|
||||
|
||||
Read more in
|
||||
:ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
|
||||
and the :ref:`User Guide <partial_dependence>`.
|
||||
|
||||
.. warning::
|
||||
|
||||
For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`, the
|
||||
`'recursion'` method (used by default) will not account for the `init`
|
||||
predictor of the boosting process. In practice, this will produce
|
||||
the same values as `'brute'` up to a constant offset in the target
|
||||
response, provided that `init` is a constant estimator (which is the
|
||||
default). However, if `init` is not a constant estimator, the
|
||||
partial dependence values are incorrect for `'recursion'` because the
|
||||
offset will be sample-dependent. It is preferable to use the `'brute'`
|
||||
method. Note that this only applies to
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : BaseEstimator
|
||||
A fitted estimator object implementing :term:`predict`,
|
||||
:term:`predict_proba`, or :term:`decision_function`.
|
||||
Multioutput-multiclass classifiers are not supported.
|
||||
|
||||
X : {array-like, sparse matrix or dataframe} of shape (n_samples, n_features)
|
||||
``X`` is used to generate a grid of values for the target
|
||||
``features`` (where the partial dependence will be evaluated), and
|
||||
also to generate values for the complement features when the
|
||||
`method` is 'brute'.
|
||||
|
||||
features : array-like of {int, str, bool} or int or str
|
||||
The feature (e.g. `[0]`) or pair of interacting features
|
||||
(e.g. `[(0, 1)]`) for which the partial dependency should be computed.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights are used to calculate weighted means when averaging the
|
||||
model output. If `None`, then samples are equally weighted. If
|
||||
`sample_weight` is not `None`, then `method` will be set to `'brute'`.
|
||||
Note that `sample_weight` is ignored for `kind='individual'`.
|
||||
|
||||
.. versionadded:: 1.3
|
||||
|
||||
categorical_features : array-like of shape (n_features,) or shape \
|
||||
(n_categorical_features,), dtype={bool, int, str}, default=None
|
||||
Indicates the categorical features.
|
||||
|
||||
- `None`: no feature will be considered categorical;
|
||||
- boolean array-like: boolean mask of shape `(n_features,)`
|
||||
indicating which features are categorical. Thus, this array has
|
||||
the same shape has `X.shape[1]`;
|
||||
- integer or string array-like: integer indices or strings
|
||||
indicating categorical features.
|
||||
|
||||
.. versionadded:: 1.2
|
||||
|
||||
feature_names : array-like of shape (n_features,), dtype=str, default=None
|
||||
Name of each feature; `feature_names[i]` holds the name of the feature
|
||||
with index `i`.
|
||||
By default, the name of the feature corresponds to their numerical
|
||||
index for NumPy array and their column name for pandas dataframe.
|
||||
|
||||
.. versionadded:: 1.2
|
||||
|
||||
response_method : {'auto', 'predict_proba', 'decision_function'}, \
|
||||
default='auto'
|
||||
Specifies whether to use :term:`predict_proba` or
|
||||
:term:`decision_function` as the target response. For regressors
|
||||
this parameter is ignored and the response is always the output of
|
||||
:term:`predict`. By default, :term:`predict_proba` is tried first
|
||||
and we revert to :term:`decision_function` if it doesn't exist. If
|
||||
``method`` is 'recursion', the response is always the output of
|
||||
:term:`decision_function`.
|
||||
|
||||
percentiles : tuple of float, default=(0.05, 0.95)
|
||||
The lower and upper percentile used to create the extreme values
|
||||
for the grid. Must be in [0, 1].
|
||||
This parameter is overridden by `custom_values` if that parameter is set.
|
||||
|
||||
grid_resolution : int, default=100
|
||||
The number of equally spaced points on the grid, for each target
|
||||
feature.
|
||||
This parameter is overridden by `custom_values` if that parameter is set.
|
||||
|
||||
custom_values : dict
|
||||
A dictionary mapping the index of an element of `features` to an array
|
||||
of values where the partial dependence should be calculated
|
||||
for that feature. Setting a range of values for a feature overrides
|
||||
`grid_resolution` and `percentiles`.
|
||||
|
||||
See :ref:`how to use partial_dependence
|
||||
<plt_partial_dependence_custom_values>` for an example of how this parameter can
|
||||
be used.
|
||||
|
||||
.. versionadded:: 1.7
|
||||
|
||||
method : {'auto', 'recursion', 'brute'}, default='auto'
|
||||
The method used to calculate the averaged predictions:
|
||||
|
||||
- `'recursion'` is only supported for some tree-based estimators
|
||||
(namely
|
||||
:class:`~sklearn.ensemble.GradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.GradientBoostingRegressor`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
|
||||
:class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
|
||||
:class:`~sklearn.tree.DecisionTreeRegressor`,
|
||||
:class:`~sklearn.ensemble.RandomForestRegressor`,
|
||||
) when `kind='average'`.
|
||||
This is more efficient in terms of speed.
|
||||
With this method, the target response of a
|
||||
classifier is always the decision function, not the predicted
|
||||
probabilities. Since the `'recursion'` method implicitly computes
|
||||
the average of the Individual Conditional Expectation (ICE) by
|
||||
design, it is not compatible with ICE and thus `kind` must be
|
||||
`'average'`.
|
||||
|
||||
- `'brute'` is supported for any estimator, but is more
|
||||
computationally intensive.
|
||||
|
||||
- `'auto'`: the `'recursion'` is used for estimators that support it,
|
||||
and `'brute'` is used otherwise. If `sample_weight` is not `None`,
|
||||
then `'brute'` is used regardless of the estimator.
|
||||
|
||||
Please see :ref:`this note <pdp_method_differences>` for
|
||||
differences between the `'brute'` and `'recursion'` method.
|
||||
|
||||
kind : {'average', 'individual', 'both'}, default='average'
|
||||
Whether to return the partial dependence averaged across all the
|
||||
samples in the dataset or one value per sample or both.
|
||||
See Returns below.
|
||||
|
||||
Note that the fast `method='recursion'` option is only available for
|
||||
`kind='average'` and `sample_weights=None`. Computing individual
|
||||
dependencies and doing weighted averages requires using the slower
|
||||
`method='brute'`.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
Returns
|
||||
-------
|
||||
predictions : :class:`~sklearn.utils.Bunch`
|
||||
Dictionary-like object, with the following attributes.
|
||||
|
||||
individual : ndarray of shape (n_outputs, n_instances, \
|
||||
len(values[0]), len(values[1]), ...)
|
||||
The predictions for all the points in the grid for all
|
||||
samples in X. This is also known as Individual
|
||||
Conditional Expectation (ICE).
|
||||
Only available when `kind='individual'` or `kind='both'`.
|
||||
|
||||
average : ndarray of shape (n_outputs, len(values[0]), \
|
||||
len(values[1]), ...)
|
||||
The predictions for all the points in the grid, averaged
|
||||
over all samples in X (or over the training data if
|
||||
`method` is 'recursion').
|
||||
Only available when `kind='average'` or `kind='both'`.
|
||||
|
||||
grid_values : seq of 1d ndarrays
|
||||
The values with which the grid has been created. The generated
|
||||
grid is a cartesian product of the arrays in `grid_values` where
|
||||
`len(grid_values) == len(features)`. The size of each array
|
||||
`grid_values[j]` is either `grid_resolution`, or the number of
|
||||
unique values in `X[:, j]`, whichever is smaller.
|
||||
|
||||
.. versionadded:: 1.3
|
||||
|
||||
`n_outputs` corresponds to the number of classes in a multi-class
|
||||
setting, or to the number of tasks for multi-output regression.
|
||||
For classical regression and binary classification `n_outputs==1`.
|
||||
`n_values_feature_j` corresponds to the size `grid_values[j]`.
|
||||
|
||||
See Also
|
||||
--------
|
||||
PartialDependenceDisplay.from_estimator : Plot Partial Dependence.
|
||||
PartialDependenceDisplay : Partial Dependence visualization.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> X = [[0, 0, 2], [1, 0, 0]]
|
||||
>>> y = [0, 1]
|
||||
>>> from sklearn.ensemble import GradientBoostingClassifier
|
||||
>>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
|
||||
>>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
|
||||
... grid_resolution=2) # doctest: +SKIP
|
||||
(array([[-4.52, 4.52]]), [array([ 0., 1.])])
|
||||
"""
|
||||
check_is_fitted(estimator)
|
||||
|
||||
if not (is_classifier(estimator) or is_regressor(estimator)):
|
||||
raise ValueError("'estimator' must be a fitted regressor or classifier.")
|
||||
|
||||
if is_classifier(estimator) and isinstance(estimator.classes_[0], np.ndarray):
|
||||
raise ValueError("Multiclass-multioutput estimators are not supported")
|
||||
|
||||
# Use check_array only on lists and other non-array-likes / sparse. Do not
|
||||
# convert DataFrame into a NumPy array.
|
||||
if not (hasattr(X, "__array__") or sparse.issparse(X)):
|
||||
X = check_array(X, ensure_all_finite="allow-nan", dtype=object)
|
||||
|
||||
if is_regressor(estimator) and response_method != "auto":
|
||||
raise ValueError(
|
||||
"The response_method parameter is ignored for regressors and "
|
||||
"must be 'auto'."
|
||||
)
|
||||
|
||||
if kind != "average":
|
||||
if method == "recursion":
|
||||
raise ValueError(
|
||||
"The 'recursion' method only applies when 'kind' is set to 'average'"
|
||||
)
|
||||
method = "brute"
|
||||
|
||||
if method == "recursion" and sample_weight is not None:
|
||||
raise ValueError(
|
||||
"The 'recursion' method can only be applied when sample_weight is None."
|
||||
)
|
||||
|
||||
if method == "auto":
|
||||
if sample_weight is not None:
|
||||
method = "brute"
|
||||
elif isinstance(estimator, BaseGradientBoosting) and estimator.init is None:
|
||||
method = "recursion"
|
||||
elif isinstance(
|
||||
estimator,
|
||||
(BaseHistGradientBoosting, DecisionTreeRegressor, RandomForestRegressor),
|
||||
):
|
||||
method = "recursion"
|
||||
else:
|
||||
method = "brute"
|
||||
|
||||
if method == "recursion":
|
||||
if not isinstance(
|
||||
estimator,
|
||||
(
|
||||
BaseGradientBoosting,
|
||||
BaseHistGradientBoosting,
|
||||
DecisionTreeRegressor,
|
||||
RandomForestRegressor,
|
||||
),
|
||||
):
|
||||
supported_classes_recursion = (
|
||||
"GradientBoostingClassifier",
|
||||
"GradientBoostingRegressor",
|
||||
"HistGradientBoostingClassifier",
|
||||
"HistGradientBoostingRegressor",
|
||||
"HistGradientBoostingRegressor",
|
||||
"DecisionTreeRegressor",
|
||||
"RandomForestRegressor",
|
||||
)
|
||||
raise ValueError(
|
||||
"Only the following estimators support the 'recursion' "
|
||||
"method: {}. Try using method='brute'.".format(
|
||||
", ".join(supported_classes_recursion)
|
||||
)
|
||||
)
|
||||
if response_method == "auto":
|
||||
response_method = "decision_function"
|
||||
|
||||
if response_method != "decision_function":
|
||||
raise ValueError(
|
||||
"With the 'recursion' method, the response_method must be "
|
||||
"'decision_function'. Got {}.".format(response_method)
|
||||
)
|
||||
|
||||
if sample_weight is not None:
|
||||
sample_weight = _check_sample_weight(sample_weight, X)
|
||||
|
||||
if _determine_key_type(features, accept_slice=False) == "int":
|
||||
# _get_column_indices() supports negative indexing. Here, we limit
|
||||
# the indexing to be positive. The upper bound will be checked
|
||||
# by _get_column_indices()
|
||||
if np.any(np.less(features, 0)):
|
||||
raise ValueError("all features must be in [0, {}]".format(X.shape[1] - 1))
|
||||
|
||||
features_indices = np.asarray(
|
||||
_get_column_indices(X, features), dtype=np.intp, order="C"
|
||||
).ravel()
|
||||
|
||||
feature_names = _check_feature_names(X, feature_names)
|
||||
|
||||
n_features = X.shape[1]
|
||||
if categorical_features is None:
|
||||
is_categorical = [False] * len(features_indices)
|
||||
else:
|
||||
categorical_features = np.asarray(categorical_features)
|
||||
if categorical_features.size == 0:
|
||||
raise ValueError(
|
||||
"Passing an empty list (`[]`) to `categorical_features` is not "
|
||||
"supported. Use `None` instead to indicate that there are no "
|
||||
"categorical features."
|
||||
)
|
||||
if categorical_features.dtype.kind == "b":
|
||||
# categorical features provided as a list of boolean
|
||||
if categorical_features.size != n_features:
|
||||
raise ValueError(
|
||||
"When `categorical_features` is a boolean array-like, "
|
||||
"the array should be of shape (n_features,). Got "
|
||||
f"{categorical_features.size} elements while `X` contains "
|
||||
f"{n_features} features."
|
||||
)
|
||||
is_categorical = [categorical_features[idx] for idx in features_indices]
|
||||
elif categorical_features.dtype.kind in ("i", "O", "U"):
|
||||
# categorical features provided as a list of indices or feature names
|
||||
categorical_features_idx = [
|
||||
_get_feature_index(cat, feature_names=feature_names)
|
||||
for cat in categorical_features
|
||||
]
|
||||
is_categorical = [
|
||||
idx in categorical_features_idx for idx in features_indices
|
||||
]
|
||||
else:
|
||||
raise ValueError(
|
||||
"Expected `categorical_features` to be an array-like of boolean,"
|
||||
f" integer, or string. Got {categorical_features.dtype} instead."
|
||||
)
|
||||
|
||||
custom_values = custom_values or {}
|
||||
if isinstance(features, (str, int)):
|
||||
features = [features]
|
||||
|
||||
for feature_idx, feature, is_cat in zip(features_indices, features, is_categorical):
|
||||
if is_cat:
|
||||
continue
|
||||
|
||||
if _safe_indexing(X, feature_idx, axis=1).dtype.kind in "iu":
|
||||
# TODO(1.9): raise a ValueError instead.
|
||||
warnings.warn(
|
||||
f"The column {feature!r} contains integer data. Partial "
|
||||
"dependence plots are not supported for integer data: this "
|
||||
"can lead to implicit rounding with NumPy arrays or even errors "
|
||||
"with newer pandas versions. Please convert numerical features"
|
||||
"to floating point dtypes ahead of time to avoid problems. "
|
||||
"This will raise ValueError in scikit-learn 1.9.",
|
||||
FutureWarning,
|
||||
)
|
||||
# Do not warn again for other features to avoid spamming the caller.
|
||||
break
|
||||
|
||||
X_subset = _safe_indexing(X, features_indices, axis=1)
|
||||
|
||||
custom_values_for_X_subset = {
|
||||
index: custom_values.get(feature)
|
||||
for index, feature in enumerate(features)
|
||||
if feature in custom_values
|
||||
}
|
||||
|
||||
grid, values = _grid_from_X(
|
||||
X_subset,
|
||||
percentiles,
|
||||
is_categorical,
|
||||
grid_resolution,
|
||||
custom_values_for_X_subset,
|
||||
)
|
||||
|
||||
if method == "brute":
|
||||
averaged_predictions, predictions = _partial_dependence_brute(
|
||||
estimator, grid, features_indices, X, response_method, sample_weight
|
||||
)
|
||||
|
||||
# reshape predictions to
|
||||
# (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)
|
||||
predictions = predictions.reshape(
|
||||
-1, X.shape[0], *[val.shape[0] for val in values]
|
||||
)
|
||||
else:
|
||||
averaged_predictions = _partial_dependence_recursion(
|
||||
estimator, grid, features_indices
|
||||
)
|
||||
|
||||
# reshape averaged_predictions to
|
||||
# (n_outputs, n_values_feature_0, n_values_feature_1, ...)
|
||||
averaged_predictions = averaged_predictions.reshape(
|
||||
-1, *[val.shape[0] for val in values]
|
||||
)
|
||||
pdp_results = Bunch(grid_values=values)
|
||||
|
||||
if kind == "average":
|
||||
pdp_results["average"] = averaged_predictions
|
||||
elif kind == "individual":
|
||||
pdp_results["individual"] = predictions
|
||||
else: # kind='both'
|
||||
pdp_results["average"] = averaged_predictions
|
||||
pdp_results["individual"] = predictions
|
||||
|
||||
return pdp_results
|
||||
@@ -0,0 +1,68 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
|
||||
def _check_feature_names(X, feature_names=None):
|
||||
"""Check feature names.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Input data.
|
||||
|
||||
feature_names : None or array-like of shape (n_names,), dtype=str
|
||||
Feature names to check or `None`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
feature_names : list of str
|
||||
Feature names validated. If `feature_names` is `None`, then a list of
|
||||
feature names is provided, i.e. the column names of a pandas dataframe
|
||||
or a generic list of feature names (e.g. `["x0", "x1", ...]`) for a
|
||||
NumPy array.
|
||||
"""
|
||||
if feature_names is None:
|
||||
if hasattr(X, "columns") and hasattr(X.columns, "tolist"):
|
||||
# get the column names for a pandas dataframe
|
||||
feature_names = X.columns.tolist()
|
||||
else:
|
||||
# define a list of numbered indices for a numpy array
|
||||
feature_names = [f"x{i}" for i in range(X.shape[1])]
|
||||
elif hasattr(feature_names, "tolist"):
|
||||
# convert numpy array or pandas index to a list
|
||||
feature_names = feature_names.tolist()
|
||||
if len(set(feature_names)) != len(feature_names):
|
||||
raise ValueError("feature_names should not contain duplicates.")
|
||||
|
||||
return feature_names
|
||||
|
||||
|
||||
def _get_feature_index(fx, feature_names=None):
|
||||
"""Get feature index.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
fx : int or str
|
||||
Feature index or name.
|
||||
|
||||
feature_names : list of str, default=None
|
||||
All feature names from which to search the indices.
|
||||
|
||||
Returns
|
||||
-------
|
||||
idx : int
|
||||
Feature index.
|
||||
"""
|
||||
if isinstance(fx, str):
|
||||
if feature_names is None:
|
||||
raise ValueError(
|
||||
f"Cannot plot partial dependence for feature {fx!r} since "
|
||||
"the list of feature names was not provided, neither as "
|
||||
"column names of a pandas data-frame nor via the feature_names "
|
||||
"parameter."
|
||||
)
|
||||
try:
|
||||
return feature_names.index(fx)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Feature {fx!r} not in feature_names") from e
|
||||
return fx
|
||||
@@ -0,0 +1,313 @@
|
||||
"""Permutation importance for estimators."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import numbers
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ..ensemble._bagging import _generate_indices
|
||||
from ..metrics import check_scoring, get_scorer_names
|
||||
from ..model_selection._validation import _aggregate_score_dicts
|
||||
from ..utils import Bunch, _safe_indexing, check_array, check_random_state
|
||||
from ..utils._param_validation import (
|
||||
HasMethods,
|
||||
Integral,
|
||||
Interval,
|
||||
RealNotInt,
|
||||
StrOptions,
|
||||
validate_params,
|
||||
)
|
||||
from ..utils.parallel import Parallel, delayed
|
||||
|
||||
|
||||
def _weights_scorer(scorer, estimator, X, y, sample_weight):
|
||||
if sample_weight is not None:
|
||||
return scorer(estimator, X, y, sample_weight=sample_weight)
|
||||
return scorer(estimator, X, y)
|
||||
|
||||
|
||||
def _calculate_permutation_scores(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
sample_weight,
|
||||
col_idx,
|
||||
random_state,
|
||||
n_repeats,
|
||||
scorer,
|
||||
max_samples,
|
||||
):
|
||||
"""Calculate score when `col_idx` is permuted."""
|
||||
random_state = check_random_state(random_state)
|
||||
|
||||
# Work on a copy of X to ensure thread-safety in case of threading based
|
||||
# parallelism. Furthermore, making a copy is also useful when the joblib
|
||||
# backend is 'loky' (default) or the old 'multiprocessing': in those cases,
|
||||
# if X is large it will be automatically be backed by a readonly memory map
|
||||
# (memmap). X.copy() on the other hand is always guaranteed to return a
|
||||
# writable data-structure whose columns can be shuffled inplace.
|
||||
if max_samples < X.shape[0]:
|
||||
row_indices = _generate_indices(
|
||||
random_state=random_state,
|
||||
bootstrap=False,
|
||||
n_population=X.shape[0],
|
||||
n_samples=max_samples,
|
||||
)
|
||||
X_permuted = _safe_indexing(X, row_indices, axis=0)
|
||||
y = _safe_indexing(y, row_indices, axis=0)
|
||||
if sample_weight is not None:
|
||||
sample_weight = _safe_indexing(sample_weight, row_indices, axis=0)
|
||||
else:
|
||||
X_permuted = X.copy()
|
||||
|
||||
scores = []
|
||||
shuffling_idx = np.arange(X_permuted.shape[0])
|
||||
for _ in range(n_repeats):
|
||||
random_state.shuffle(shuffling_idx)
|
||||
if hasattr(X_permuted, "iloc"):
|
||||
col = X_permuted.iloc[shuffling_idx, col_idx]
|
||||
col.index = X_permuted.index
|
||||
X_permuted[X_permuted.columns[col_idx]] = col
|
||||
else:
|
||||
X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
|
||||
scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))
|
||||
|
||||
if isinstance(scores[0], dict):
|
||||
scores = _aggregate_score_dicts(scores)
|
||||
else:
|
||||
scores = np.array(scores)
|
||||
|
||||
return scores
|
||||
|
||||
|
||||
def _create_importances_bunch(baseline_score, permuted_score):
|
||||
"""Compute the importances as the decrease in score.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
baseline_score : ndarray of shape (n_features,)
|
||||
The baseline score without permutation.
|
||||
permuted_score : ndarray of shape (n_features, n_repeats)
|
||||
The permuted scores for the `n` repetitions.
|
||||
|
||||
Returns
|
||||
-------
|
||||
importances : :class:`~sklearn.utils.Bunch`
|
||||
Dictionary-like object, with the following attributes.
|
||||
importances_mean : ndarray, shape (n_features, )
|
||||
Mean of feature importance over `n_repeats`.
|
||||
importances_std : ndarray, shape (n_features, )
|
||||
Standard deviation over `n_repeats`.
|
||||
importances : ndarray, shape (n_features, n_repeats)
|
||||
Raw permutation importance scores.
|
||||
"""
|
||||
importances = baseline_score - permuted_score
|
||||
return Bunch(
|
||||
importances_mean=np.mean(importances, axis=1),
|
||||
importances_std=np.std(importances, axis=1),
|
||||
importances=importances,
|
||||
)
|
||||
|
||||
|
||||
@validate_params(
|
||||
{
|
||||
"estimator": [HasMethods(["fit"])],
|
||||
"X": ["array-like"],
|
||||
"y": ["array-like", None],
|
||||
"scoring": [
|
||||
StrOptions(set(get_scorer_names())),
|
||||
callable,
|
||||
list,
|
||||
tuple,
|
||||
dict,
|
||||
None,
|
||||
],
|
||||
"n_repeats": [Interval(Integral, 1, None, closed="left")],
|
||||
"n_jobs": [Integral, None],
|
||||
"random_state": ["random_state"],
|
||||
"sample_weight": ["array-like", None],
|
||||
"max_samples": [
|
||||
Interval(Integral, 1, None, closed="left"),
|
||||
Interval(RealNotInt, 0, 1, closed="right"),
|
||||
],
|
||||
},
|
||||
prefer_skip_nested_validation=True,
|
||||
)
|
||||
def permutation_importance(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
*,
|
||||
scoring=None,
|
||||
n_repeats=5,
|
||||
n_jobs=None,
|
||||
random_state=None,
|
||||
sample_weight=None,
|
||||
max_samples=1.0,
|
||||
):
|
||||
"""Permutation importance for feature evaluation [BRE]_.
|
||||
|
||||
The :term:`estimator` is required to be a fitted estimator. `X` can be the
|
||||
data set used to train the estimator or a hold-out set. The permutation
|
||||
importance of a feature is calculated as follows. First, a baseline metric,
|
||||
defined by :term:`scoring`, is evaluated on a (potentially different)
|
||||
dataset defined by the `X`. Next, a feature column from the validation set
|
||||
is permuted and the metric is evaluated again. The permutation importance
|
||||
is defined to be the difference between the baseline metric and metric from
|
||||
permutating the feature column.
|
||||
|
||||
Read more in the :ref:`User Guide <permutation_importance>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object
|
||||
An estimator that has already been :term:`fitted` and is compatible
|
||||
with :term:`scorer`.
|
||||
|
||||
X : ndarray or DataFrame, shape (n_samples, n_features)
|
||||
Data on which permutation importance will be computed.
|
||||
|
||||
y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
|
||||
Targets for supervised or `None` for unsupervised.
|
||||
|
||||
scoring : str, callable, list, tuple, or dict, default=None
|
||||
Scorer to use.
|
||||
If `scoring` represents a single score, one can use:
|
||||
|
||||
- str: see :ref:`scoring_string_names` for options.
|
||||
- callable: a scorer callable object (e.g., function) with signature
|
||||
``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
|
||||
- `None`: the `estimator`'s
|
||||
:ref:`default evaluation criterion <scoring_api_overview>` is used.
|
||||
|
||||
If `scoring` represents multiple scores, one can use:
|
||||
|
||||
- a list or tuple of unique strings;
|
||||
- a callable returning a dictionary where the keys are the metric
|
||||
names and the values are the metric scores;
|
||||
- a dictionary with metric names as keys and callables a values.
|
||||
|
||||
Passing multiple scores to `scoring` is more efficient than calling
|
||||
`permutation_importance` for each of the scores as it reuses
|
||||
predictions to avoid redundant computation.
|
||||
|
||||
n_repeats : int, default=5
|
||||
Number of times to permute a feature.
|
||||
|
||||
n_jobs : int or None, default=None
|
||||
Number of jobs to run in parallel. The computation is done by computing
|
||||
permutation score for each columns and parallelized over the columns.
|
||||
`None` means 1 unless in a :obj:`joblib.parallel_backend` context.
|
||||
`-1` means using all processors. See :term:`Glossary <n_jobs>`
|
||||
for more details.
|
||||
|
||||
random_state : int, RandomState instance, default=None
|
||||
Pseudo-random number generator to control the permutations of each
|
||||
feature.
|
||||
Pass an int to get reproducible results across function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
sample_weight : array-like of shape (n_samples,), default=None
|
||||
Sample weights used in scoring.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
max_samples : int or float, default=1.0
|
||||
The number of samples to draw from X to compute feature importance
|
||||
in each repeat (without replacement).
|
||||
|
||||
- If int, then draw `max_samples` samples.
|
||||
- If float, then draw `max_samples * X.shape[0]` samples.
|
||||
- If `max_samples` is equal to `1.0` or `X.shape[0]`, all samples
|
||||
will be used.
|
||||
|
||||
While using this option may provide less accurate importance estimates,
|
||||
it keeps the method tractable when evaluating feature importance on
|
||||
large datasets. In combination with `n_repeats`, this allows to control
|
||||
the computational speed vs statistical accuracy trade-off of this method.
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
Returns
|
||||
-------
|
||||
result : :class:`~sklearn.utils.Bunch` or dict of such instances
|
||||
Dictionary-like object, with the following attributes.
|
||||
|
||||
importances_mean : ndarray of shape (n_features, )
|
||||
Mean of feature importance over `n_repeats`.
|
||||
importances_std : ndarray of shape (n_features, )
|
||||
Standard deviation over `n_repeats`.
|
||||
importances : ndarray of shape (n_features, n_repeats)
|
||||
Raw permutation importance scores.
|
||||
|
||||
If there are multiple scoring metrics in the scoring parameter
|
||||
`result` is a dict with scorer names as keys (e.g. 'roc_auc') and
|
||||
`Bunch` objects like above as values.
|
||||
|
||||
References
|
||||
----------
|
||||
.. [BRE] :doi:`L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
|
||||
2001. <10.1023/A:1010933404324>`
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.linear_model import LogisticRegression
|
||||
>>> from sklearn.inspection import permutation_importance
|
||||
>>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],
|
||||
... [0, 9, 9],[0, 9, 9],[0, 9, 9]]
|
||||
>>> y = [1, 1, 1, 0, 0, 0]
|
||||
>>> clf = LogisticRegression().fit(X, y)
|
||||
>>> result = permutation_importance(clf, X, y, n_repeats=10,
|
||||
... random_state=0)
|
||||
>>> result.importances_mean
|
||||
array([0.4666, 0. , 0. ])
|
||||
>>> result.importances_std
|
||||
array([0.2211, 0. , 0. ])
|
||||
"""
|
||||
if not hasattr(X, "iloc"):
|
||||
X = check_array(X, ensure_all_finite="allow-nan", dtype=None)
|
||||
|
||||
# Precompute random seed from the random state to be used
|
||||
# to get a fresh independent RandomState instance for each
|
||||
# parallel call to _calculate_permutation_scores, irrespective of
|
||||
# the fact that variables are shared or not depending on the active
|
||||
# joblib backend (sequential, thread-based or process-based).
|
||||
random_state = check_random_state(random_state)
|
||||
random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
|
||||
|
||||
if not isinstance(max_samples, numbers.Integral):
|
||||
max_samples = int(max_samples * X.shape[0])
|
||||
elif max_samples > X.shape[0]:
|
||||
raise ValueError("max_samples must be <= n_samples")
|
||||
|
||||
scorer = check_scoring(estimator, scoring=scoring)
|
||||
baseline_score = _weights_scorer(scorer, estimator, X, y, sample_weight)
|
||||
|
||||
scores = Parallel(n_jobs=n_jobs)(
|
||||
delayed(_calculate_permutation_scores)(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
sample_weight,
|
||||
col_idx,
|
||||
random_seed,
|
||||
n_repeats,
|
||||
scorer,
|
||||
max_samples,
|
||||
)
|
||||
for col_idx in range(X.shape[1])
|
||||
)
|
||||
|
||||
if isinstance(baseline_score, dict):
|
||||
return {
|
||||
name: _create_importances_bunch(
|
||||
baseline_score[name],
|
||||
# unpack the permuted scores
|
||||
np.array([scores[col_idx][name] for col_idx in range(X.shape[1])]),
|
||||
)
|
||||
for name in baseline_score
|
||||
}
|
||||
else:
|
||||
return _create_importances_bunch(baseline_score, np.array(scores))
|
||||
@@ -0,0 +1,2 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,564 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ...base import is_regressor
|
||||
from ...preprocessing import LabelEncoder
|
||||
from ...utils import _safe_indexing
|
||||
from ...utils._optional_dependencies import check_matplotlib_support
|
||||
from ...utils._response import _get_response_values
|
||||
from ...utils._set_output import _get_adapter_from_container
|
||||
from ...utils.validation import (
|
||||
_is_arraylike_not_scalar,
|
||||
_is_pandas_df,
|
||||
_is_polars_df,
|
||||
_num_features,
|
||||
check_is_fitted,
|
||||
)
|
||||
|
||||
|
||||
def _check_boundary_response_method(estimator, response_method, class_of_interest):
|
||||
"""Validate the response methods to be used with the fitted estimator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object
|
||||
Fitted estimator to check.
|
||||
|
||||
response_method : {'auto', 'decision_function', 'predict_proba', 'predict'}
|
||||
Specifies whether to use :term:`decision_function`, :term:`predict_proba`,
|
||||
:term:`predict` as the target response. If set to 'auto', the response method is
|
||||
tried in the before mentioned order.
|
||||
|
||||
class_of_interest : int, float, bool, str or None
|
||||
The class considered when plotting the decision. Cannot be None if
|
||||
multiclass and `response_method` is 'predict_proba' or 'decision_function'.
|
||||
|
||||
.. versionadded:: 1.4
|
||||
|
||||
Returns
|
||||
-------
|
||||
prediction_method : list of str or str
|
||||
The name or list of names of the response methods to use.
|
||||
"""
|
||||
has_classes = hasattr(estimator, "classes_")
|
||||
if has_classes and _is_arraylike_not_scalar(estimator.classes_[0]):
|
||||
msg = "Multi-label and multi-output multi-class classifiers are not supported"
|
||||
raise ValueError(msg)
|
||||
|
||||
if response_method == "auto":
|
||||
if is_regressor(estimator):
|
||||
prediction_method = "predict"
|
||||
else:
|
||||
prediction_method = ["decision_function", "predict_proba", "predict"]
|
||||
else:
|
||||
prediction_method = response_method
|
||||
|
||||
return prediction_method
|
||||
|
||||
|
||||
class DecisionBoundaryDisplay:
|
||||
"""Decisions boundary visualization.
|
||||
|
||||
It is recommended to use
|
||||
:func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`
|
||||
to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as
|
||||
attributes.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>`.
|
||||
|
||||
For a detailed example comparing the decision boundaries of multinomial and
|
||||
one-vs-rest logistic regression, please see
|
||||
:ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py`.
|
||||
|
||||
.. versionadded:: 1.1
|
||||
|
||||
Parameters
|
||||
----------
|
||||
xx0 : ndarray of shape (grid_resolution, grid_resolution)
|
||||
First output of :func:`meshgrid <numpy.meshgrid>`.
|
||||
|
||||
xx1 : ndarray of shape (grid_resolution, grid_resolution)
|
||||
Second output of :func:`meshgrid <numpy.meshgrid>`.
|
||||
|
||||
response : ndarray of shape (grid_resolution, grid_resolution) or \
|
||||
(grid_resolution, grid_resolution, n_classes)
|
||||
Values of the response function.
|
||||
|
||||
multiclass_colors : list of str or str, default=None
|
||||
Specifies how to color each class when plotting all classes of multiclass
|
||||
problem. Ignored for binary problems and multiclass problems when plotting a
|
||||
single prediction value per point.
|
||||
Possible inputs are:
|
||||
|
||||
* list: list of Matplotlib
|
||||
`color <https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def>`_
|
||||
strings, of length `n_classes`
|
||||
* str: name of :class:`matplotlib.colors.Colormap`
|
||||
* None: 'viridis' colormap is used to sample colors
|
||||
|
||||
Single color colormaps will be generated from the colors in the list or
|
||||
colors taken from the colormap and passed to the `cmap` parameter of
|
||||
the `plot_method`.
|
||||
|
||||
.. versionadded:: 1.7
|
||||
|
||||
xlabel : str, default=None
|
||||
Default label to place on x axis.
|
||||
|
||||
ylabel : str, default=None
|
||||
Default label to place on y axis.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
surface_ : matplotlib `QuadContourSet` or `QuadMesh` or list of such objects
|
||||
If `plot_method` is 'contour' or 'contourf', `surface_` is
|
||||
:class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If
|
||||
`plot_method` is 'pcolormesh', `surface_` is
|
||||
:class:`QuadMesh <matplotlib.collections.QuadMesh>`.
|
||||
|
||||
multiclass_colors_ : array of shape (n_classes, 4)
|
||||
Colors used to plot each class in multiclass problems.
|
||||
Only defined when `color_of_interest` is None.
|
||||
|
||||
.. versionadded:: 1.7
|
||||
|
||||
ax_ : matplotlib Axes
|
||||
Axes with decision boundary.
|
||||
|
||||
figure_ : matplotlib Figure
|
||||
Figure containing the decision boundary.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DecisionBoundaryDisplay.from_estimator : Plot decision boundary given an estimator.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.datasets import load_iris
|
||||
>>> from sklearn.inspection import DecisionBoundaryDisplay
|
||||
>>> from sklearn.tree import DecisionTreeClassifier
|
||||
>>> iris = load_iris()
|
||||
>>> feature_1, feature_2 = np.meshgrid(
|
||||
... np.linspace(iris.data[:, 0].min(), iris.data[:, 0].max()),
|
||||
... np.linspace(iris.data[:, 1].min(), iris.data[:, 1].max())
|
||||
... )
|
||||
>>> grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T
|
||||
>>> tree = DecisionTreeClassifier().fit(iris.data[:, :2], iris.target)
|
||||
>>> y_pred = np.reshape(tree.predict(grid), feature_1.shape)
|
||||
>>> display = DecisionBoundaryDisplay(
|
||||
... xx0=feature_1, xx1=feature_2, response=y_pred
|
||||
... )
|
||||
>>> display.plot()
|
||||
<...>
|
||||
>>> display.ax_.scatter(
|
||||
... iris.data[:, 0], iris.data[:, 1], c=iris.target, edgecolor="black"
|
||||
... )
|
||||
<...>
|
||||
>>> plt.show()
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, *, xx0, xx1, response, multiclass_colors=None, xlabel=None, ylabel=None
|
||||
):
|
||||
self.xx0 = xx0
|
||||
self.xx1 = xx1
|
||||
self.response = response
|
||||
self.multiclass_colors = multiclass_colors
|
||||
self.xlabel = xlabel
|
||||
self.ylabel = ylabel
|
||||
|
||||
def plot(self, plot_method="contourf", ax=None, xlabel=None, ylabel=None, **kwargs):
|
||||
"""Plot visualization.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
|
||||
Plotting method to call when plotting the response. Please refer
|
||||
to the following matplotlib documentation for details:
|
||||
:func:`contourf <matplotlib.pyplot.contourf>`,
|
||||
:func:`contour <matplotlib.pyplot.contour>`,
|
||||
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
|
||||
|
||||
ax : Matplotlib axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
xlabel : str, default=None
|
||||
Overwrite the x-axis label.
|
||||
|
||||
ylabel : str, default=None
|
||||
Overwrite the y-axis label.
|
||||
|
||||
**kwargs : dict
|
||||
Additional keyword arguments to be passed to the `plot_method`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`
|
||||
Object that stores computed values.
|
||||
"""
|
||||
check_matplotlib_support("DecisionBoundaryDisplay.plot")
|
||||
import matplotlib as mpl
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
if plot_method not in ("contourf", "contour", "pcolormesh"):
|
||||
raise ValueError(
|
||||
"plot_method must be 'contourf', 'contour', or 'pcolormesh'. "
|
||||
f"Got {plot_method} instead."
|
||||
)
|
||||
|
||||
if ax is None:
|
||||
_, ax = plt.subplots()
|
||||
|
||||
plot_func = getattr(ax, plot_method)
|
||||
if self.response.ndim == 2:
|
||||
self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)
|
||||
else: # self.response.ndim == 3
|
||||
n_responses = self.response.shape[-1]
|
||||
for kwarg in ("cmap", "colors"):
|
||||
if kwarg in kwargs:
|
||||
warnings.warn(
|
||||
f"'{kwarg}' is ignored in favor of 'multiclass_colors' "
|
||||
"in the multiclass case when the response method is "
|
||||
"'decision_function' or 'predict_proba'."
|
||||
)
|
||||
del kwargs[kwarg]
|
||||
|
||||
if self.multiclass_colors is None or isinstance(
|
||||
self.multiclass_colors, str
|
||||
):
|
||||
if self.multiclass_colors is None:
|
||||
cmap = "tab10" if n_responses <= 10 else "gist_rainbow"
|
||||
else:
|
||||
cmap = self.multiclass_colors
|
||||
|
||||
# Special case for the tab10 and tab20 colormaps that encode a
|
||||
# discrete set of colors that are easily distinguishable
|
||||
# contrary to other colormaps that are continuous.
|
||||
if cmap == "tab10" and n_responses <= 10:
|
||||
colors = plt.get_cmap("tab10", 10).colors[:n_responses]
|
||||
elif cmap == "tab20" and n_responses <= 20:
|
||||
colors = plt.get_cmap("tab20", 20).colors[:n_responses]
|
||||
else:
|
||||
cmap = plt.get_cmap(cmap, n_responses)
|
||||
if not hasattr(cmap, "colors"):
|
||||
# For LinearSegmentedColormap
|
||||
colors = cmap(np.linspace(0, 1, n_responses))
|
||||
else:
|
||||
colors = cmap.colors
|
||||
elif isinstance(self.multiclass_colors, list):
|
||||
colors = [mpl.colors.to_rgba(color) for color in self.multiclass_colors]
|
||||
else:
|
||||
raise ValueError("'multiclass_colors' must be a list or a str.")
|
||||
|
||||
self.multiclass_colors_ = colors
|
||||
if plot_method == "contour":
|
||||
# Plot only argmax map for contour
|
||||
class_map = self.response.argmax(axis=2)
|
||||
self.surface_ = plot_func(
|
||||
self.xx0, self.xx1, class_map, colors=colors, **kwargs
|
||||
)
|
||||
else:
|
||||
multiclass_cmaps = [
|
||||
mpl.colors.LinearSegmentedColormap.from_list(
|
||||
f"colormap_{class_idx}", [(1.0, 1.0, 1.0, 1.0), (r, g, b, 1.0)]
|
||||
)
|
||||
for class_idx, (r, g, b, _) in enumerate(colors)
|
||||
]
|
||||
|
||||
self.surface_ = []
|
||||
for class_idx, cmap in enumerate(multiclass_cmaps):
|
||||
response = np.ma.array(
|
||||
self.response[:, :, class_idx],
|
||||
mask=~(self.response.argmax(axis=2) == class_idx),
|
||||
)
|
||||
self.surface_.append(
|
||||
plot_func(self.xx0, self.xx1, response, cmap=cmap, **kwargs)
|
||||
)
|
||||
|
||||
if xlabel is not None or not ax.get_xlabel():
|
||||
xlabel = self.xlabel if xlabel is None else xlabel
|
||||
ax.set_xlabel(xlabel)
|
||||
if ylabel is not None or not ax.get_ylabel():
|
||||
ylabel = self.ylabel if ylabel is None else ylabel
|
||||
ax.set_ylabel(ylabel)
|
||||
|
||||
self.ax_ = ax
|
||||
self.figure_ = ax.figure
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def from_estimator(
|
||||
cls,
|
||||
estimator,
|
||||
X,
|
||||
*,
|
||||
grid_resolution=100,
|
||||
eps=1.0,
|
||||
plot_method="contourf",
|
||||
response_method="auto",
|
||||
class_of_interest=None,
|
||||
multiclass_colors=None,
|
||||
xlabel=None,
|
||||
ylabel=None,
|
||||
ax=None,
|
||||
**kwargs,
|
||||
):
|
||||
"""Plot decision boundary given an estimator.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object
|
||||
Trained estimator used to plot the decision boundary.
|
||||
|
||||
X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)
|
||||
Input data that should be only 2-dimensional.
|
||||
|
||||
grid_resolution : int, default=100
|
||||
Number of grid points to use for plotting decision boundary.
|
||||
Higher values will make the plot look nicer but be slower to
|
||||
render.
|
||||
|
||||
eps : float, default=1.0
|
||||
Extends the minimum and maximum values of X for evaluating the
|
||||
response function.
|
||||
|
||||
plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
|
||||
Plotting method to call when plotting the response. Please refer
|
||||
to the following matplotlib documentation for details:
|
||||
:func:`contourf <matplotlib.pyplot.contourf>`,
|
||||
:func:`contour <matplotlib.pyplot.contour>`,
|
||||
:func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
|
||||
|
||||
response_method : {'auto', 'decision_function', 'predict_proba', \
|
||||
'predict'}, default='auto'
|
||||
Specifies whether to use :term:`decision_function`,
|
||||
:term:`predict_proba` or :term:`predict` as the target response.
|
||||
If set to 'auto', the response method is tried in the order as
|
||||
listed above.
|
||||
|
||||
.. versionchanged:: 1.6
|
||||
For multiclass problems, 'auto' no longer defaults to 'predict'.
|
||||
|
||||
class_of_interest : int, float, bool or str, default=None
|
||||
The class to be plotted when `response_method` is 'predict_proba'
|
||||
or 'decision_function'. If None, `estimator.classes_[1]` is considered
|
||||
the positive class for binary classifiers. For multiclass
|
||||
classifiers, if None, all classes will be represented in the
|
||||
decision boundary plot; the class with the highest response value
|
||||
at each point is plotted. The color of each class can be set via
|
||||
`multiclass_colors`.
|
||||
|
||||
.. versionadded:: 1.4
|
||||
|
||||
multiclass_colors : list of str, or str, default=None
|
||||
Specifies how to color each class when plotting multiclass
|
||||
'predict_proba' or 'decision_function' and `class_of_interest` is
|
||||
None. Ignored in all other cases.
|
||||
|
||||
Possible inputs are:
|
||||
|
||||
* list: list of Matplotlib
|
||||
`color <https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def>`_
|
||||
strings, of length `n_classes`
|
||||
* str: name of :class:`matplotlib.colors.Colormap`
|
||||
* None: 'tab10' colormap is used to sample colors if the number of
|
||||
classes is less than or equal to 10, otherwise 'gist_rainbow'
|
||||
colormap.
|
||||
|
||||
Single color colormaps will be generated from the colors in the list or
|
||||
colors taken from the colormap, and passed to the `cmap` parameter of
|
||||
the `plot_method`.
|
||||
|
||||
.. versionadded:: 1.7
|
||||
|
||||
xlabel : str, default=None
|
||||
The label used for the x-axis. If `None`, an attempt is made to
|
||||
extract a label from `X` if it is a dataframe, otherwise an empty
|
||||
string is used.
|
||||
|
||||
ylabel : str, default=None
|
||||
The label used for the y-axis. If `None`, an attempt is made to
|
||||
extract a label from `X` if it is a dataframe, otherwise an empty
|
||||
string is used.
|
||||
|
||||
ax : Matplotlib axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
**kwargs : dict
|
||||
Additional keyword arguments to be passed to the
|
||||
`plot_method`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`
|
||||
Object that stores the result.
|
||||
|
||||
See Also
|
||||
--------
|
||||
DecisionBoundaryDisplay : Decision boundary visualization.
|
||||
sklearn.metrics.ConfusionMatrixDisplay.from_estimator : Plot the
|
||||
confusion matrix given an estimator, the data, and the label.
|
||||
sklearn.metrics.ConfusionMatrixDisplay.from_predictions : Plot the
|
||||
confusion matrix given the true and predicted labels.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> from sklearn.datasets import load_iris
|
||||
>>> from sklearn.linear_model import LogisticRegression
|
||||
>>> from sklearn.inspection import DecisionBoundaryDisplay
|
||||
>>> iris = load_iris()
|
||||
>>> X = iris.data[:, :2]
|
||||
>>> classifier = LogisticRegression().fit(X, iris.target)
|
||||
>>> disp = DecisionBoundaryDisplay.from_estimator(
|
||||
... classifier, X, response_method="predict",
|
||||
... xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],
|
||||
... alpha=0.5,
|
||||
... )
|
||||
>>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k")
|
||||
<...>
|
||||
>>> plt.show()
|
||||
"""
|
||||
check_matplotlib_support(f"{cls.__name__}.from_estimator")
|
||||
check_is_fitted(estimator)
|
||||
import matplotlib as mpl
|
||||
|
||||
if not grid_resolution > 1:
|
||||
raise ValueError(
|
||||
"grid_resolution must be greater than 1. Got"
|
||||
f" {grid_resolution} instead."
|
||||
)
|
||||
|
||||
if not eps >= 0:
|
||||
raise ValueError(
|
||||
f"eps must be greater than or equal to 0. Got {eps} instead."
|
||||
)
|
||||
|
||||
possible_plot_methods = ("contourf", "contour", "pcolormesh")
|
||||
if plot_method not in possible_plot_methods:
|
||||
available_methods = ", ".join(possible_plot_methods)
|
||||
raise ValueError(
|
||||
f"plot_method must be one of {available_methods}. "
|
||||
f"Got {plot_method} instead."
|
||||
)
|
||||
|
||||
num_features = _num_features(X)
|
||||
if num_features != 2:
|
||||
raise ValueError(
|
||||
f"n_features must be equal to 2. Got {num_features} instead."
|
||||
)
|
||||
|
||||
if (
|
||||
response_method in ("predict_proba", "decision_function", "auto")
|
||||
and multiclass_colors is not None
|
||||
and hasattr(estimator, "classes_")
|
||||
and (n_classes := len(estimator.classes_)) > 2
|
||||
):
|
||||
if isinstance(multiclass_colors, list):
|
||||
if len(multiclass_colors) != n_classes:
|
||||
raise ValueError(
|
||||
"When 'multiclass_colors' is a list, it must be of the same "
|
||||
f"length as 'estimator.classes_' ({n_classes}), got: "
|
||||
f"{len(multiclass_colors)}."
|
||||
)
|
||||
elif any(
|
||||
not mpl.colors.is_color_like(col) for col in multiclass_colors
|
||||
):
|
||||
raise ValueError(
|
||||
"When 'multiclass_colors' is a list, it can only contain valid"
|
||||
f" Matplotlib color names. Got: {multiclass_colors}"
|
||||
)
|
||||
if isinstance(multiclass_colors, str):
|
||||
if multiclass_colors not in mpl.pyplot.colormaps():
|
||||
raise ValueError(
|
||||
"When 'multiclass_colors' is a string, it must be a valid "
|
||||
f"Matplotlib colormap. Got: {multiclass_colors}"
|
||||
)
|
||||
|
||||
x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)
|
||||
|
||||
x0_min, x0_max = x0.min() - eps, x0.max() + eps
|
||||
x1_min, x1_max = x1.min() - eps, x1.max() + eps
|
||||
|
||||
xx0, xx1 = np.meshgrid(
|
||||
np.linspace(x0_min, x0_max, grid_resolution),
|
||||
np.linspace(x1_min, x1_max, grid_resolution),
|
||||
)
|
||||
|
||||
X_grid = np.c_[xx0.ravel(), xx1.ravel()]
|
||||
if _is_pandas_df(X) or _is_polars_df(X):
|
||||
adapter = _get_adapter_from_container(X)
|
||||
X_grid = adapter.create_container(
|
||||
X_grid,
|
||||
X_grid,
|
||||
columns=X.columns,
|
||||
)
|
||||
|
||||
prediction_method = _check_boundary_response_method(
|
||||
estimator, response_method, class_of_interest
|
||||
)
|
||||
try:
|
||||
response, _, response_method_used = _get_response_values(
|
||||
estimator,
|
||||
X_grid,
|
||||
response_method=prediction_method,
|
||||
pos_label=class_of_interest,
|
||||
return_response_method_used=True,
|
||||
)
|
||||
except ValueError as exc:
|
||||
if "is not a valid label" in str(exc):
|
||||
# re-raise a more informative error message since `pos_label` is unknown
|
||||
# to our user when interacting with
|
||||
# `DecisionBoundaryDisplay.from_estimator`
|
||||
raise ValueError(
|
||||
f"class_of_interest={class_of_interest} is not a valid label: It "
|
||||
f"should be one of {estimator.classes_}"
|
||||
) from exc
|
||||
raise
|
||||
|
||||
# convert classes predictions into integers
|
||||
if response_method_used == "predict" and hasattr(estimator, "classes_"):
|
||||
encoder = LabelEncoder()
|
||||
encoder.classes_ = estimator.classes_
|
||||
response = encoder.transform(response)
|
||||
|
||||
if response.ndim == 1:
|
||||
response = response.reshape(*xx0.shape)
|
||||
else:
|
||||
if is_regressor(estimator):
|
||||
raise ValueError("Multi-output regressors are not supported")
|
||||
|
||||
if class_of_interest is not None:
|
||||
# For the multiclass case, `_get_response_values` returns the response
|
||||
# as-is. Thus, we have a column per class and we need to select the
|
||||
# column corresponding to the positive class.
|
||||
col_idx = np.flatnonzero(estimator.classes_ == class_of_interest)[0]
|
||||
response = response[:, col_idx].reshape(*xx0.shape)
|
||||
else:
|
||||
response = response.reshape(*xx0.shape, response.shape[-1])
|
||||
|
||||
if xlabel is None:
|
||||
xlabel = X.columns[0] if hasattr(X, "columns") else ""
|
||||
|
||||
if ylabel is None:
|
||||
ylabel = X.columns[1] if hasattr(X, "columns") else ""
|
||||
|
||||
display = cls(
|
||||
xx0=xx0,
|
||||
xx1=xx1,
|
||||
response=response,
|
||||
multiclass_colors=multiclass_colors,
|
||||
xlabel=xlabel,
|
||||
ylabel=ylabel,
|
||||
)
|
||||
return display.plot(ax=ax, plot_method=plot_method, **kwargs)
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,710 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.base import BaseEstimator, ClassifierMixin
|
||||
from sklearn.datasets import (
|
||||
load_diabetes,
|
||||
load_iris,
|
||||
make_classification,
|
||||
make_multilabel_classification,
|
||||
)
|
||||
from sklearn.ensemble import IsolationForest
|
||||
from sklearn.inspection import DecisionBoundaryDisplay
|
||||
from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.preprocessing import scale
|
||||
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
||||
from sklearn.utils._testing import (
|
||||
_convert_container,
|
||||
assert_allclose,
|
||||
assert_array_equal,
|
||||
)
|
||||
from sklearn.utils.fixes import parse_version
|
||||
|
||||
X, y = make_classification(
|
||||
n_informative=1,
|
||||
n_redundant=1,
|
||||
n_clusters_per_class=1,
|
||||
n_features=2,
|
||||
random_state=42,
|
||||
)
|
||||
|
||||
|
||||
def load_iris_2d_scaled():
|
||||
X, y = load_iris(return_X_y=True)
|
||||
X = scale(X)[:, :2]
|
||||
return X, y
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def fitted_clf():
|
||||
return LogisticRegression().fit(X, y)
|
||||
|
||||
|
||||
def test_input_data_dimension(pyplot):
|
||||
"""Check that we raise an error when `X` does not have exactly 2 features."""
|
||||
X, y = make_classification(n_samples=10, n_features=4, random_state=0)
|
||||
|
||||
clf = LogisticRegression().fit(X, y)
|
||||
msg = "n_features must be equal to 2. Got 4 instead."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DecisionBoundaryDisplay.from_estimator(estimator=clf, X=X)
|
||||
|
||||
|
||||
def test_check_boundary_response_method_error():
|
||||
"""Check error raised for multi-output multi-class classifiers by
|
||||
`_check_boundary_response_method`.
|
||||
"""
|
||||
|
||||
class MultiLabelClassifier:
|
||||
classes_ = [np.array([0, 1]), np.array([0, 1])]
|
||||
|
||||
err_msg = "Multi-label and multi-output multi-class classifiers are not supported"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
_check_boundary_response_method(MultiLabelClassifier(), "predict", None)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator, response_method, class_of_interest, expected_prediction_method",
|
||||
[
|
||||
(DecisionTreeRegressor(), "predict", None, "predict"),
|
||||
(DecisionTreeRegressor(), "auto", None, "predict"),
|
||||
(LogisticRegression().fit(*load_iris_2d_scaled()), "predict", None, "predict"),
|
||||
(
|
||||
LogisticRegression().fit(*load_iris_2d_scaled()),
|
||||
"auto",
|
||||
None,
|
||||
["decision_function", "predict_proba", "predict"],
|
||||
),
|
||||
(
|
||||
LogisticRegression().fit(*load_iris_2d_scaled()),
|
||||
"predict_proba",
|
||||
0,
|
||||
"predict_proba",
|
||||
),
|
||||
(
|
||||
LogisticRegression().fit(*load_iris_2d_scaled()),
|
||||
"decision_function",
|
||||
0,
|
||||
"decision_function",
|
||||
),
|
||||
(
|
||||
LogisticRegression().fit(X, y),
|
||||
"auto",
|
||||
None,
|
||||
["decision_function", "predict_proba", "predict"],
|
||||
),
|
||||
(LogisticRegression().fit(X, y), "predict", None, "predict"),
|
||||
(
|
||||
LogisticRegression().fit(X, y),
|
||||
["predict_proba", "decision_function"],
|
||||
None,
|
||||
["predict_proba", "decision_function"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_check_boundary_response_method(
|
||||
estimator, response_method, class_of_interest, expected_prediction_method
|
||||
):
|
||||
"""Check the behaviour of `_check_boundary_response_method` for the supported
|
||||
cases.
|
||||
"""
|
||||
prediction_method = _check_boundary_response_method(
|
||||
estimator, response_method, class_of_interest
|
||||
)
|
||||
assert prediction_method == expected_prediction_method
|
||||
|
||||
|
||||
def test_multiclass_predict(pyplot):
|
||||
"""Check multiclass `response=predict` gives expected results."""
|
||||
grid_resolution = 10
|
||||
eps = 1.0
|
||||
X, y = make_classification(n_classes=3, n_informative=3, random_state=0)
|
||||
X = X[:, [0, 1]]
|
||||
lr = LogisticRegression(random_state=0).fit(X, y)
|
||||
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
lr, X, response_method="predict", grid_resolution=grid_resolution, eps=1.0
|
||||
)
|
||||
|
||||
x0_min, x0_max = X[:, 0].min() - eps, X[:, 0].max() + eps
|
||||
x1_min, x1_max = X[:, 1].min() - eps, X[:, 1].max() + eps
|
||||
xx0, xx1 = np.meshgrid(
|
||||
np.linspace(x0_min, x0_max, grid_resolution),
|
||||
np.linspace(x1_min, x1_max, grid_resolution),
|
||||
)
|
||||
response = lr.predict(np.c_[xx0.ravel(), xx1.ravel()])
|
||||
assert_allclose(disp.response, response.reshape(xx0.shape))
|
||||
assert_allclose(disp.xx0, xx0)
|
||||
assert_allclose(disp.xx1, xx1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs, error_msg",
|
||||
[
|
||||
(
|
||||
{"plot_method": "hello_world"},
|
||||
r"plot_method must be one of contourf, contour, pcolormesh. Got hello_world"
|
||||
r" instead.",
|
||||
),
|
||||
(
|
||||
{"grid_resolution": 1},
|
||||
r"grid_resolution must be greater than 1. Got 1 instead",
|
||||
),
|
||||
(
|
||||
{"grid_resolution": -1},
|
||||
r"grid_resolution must be greater than 1. Got -1 instead",
|
||||
),
|
||||
({"eps": -1.1}, r"eps must be greater than or equal to 0. Got -1.1 instead"),
|
||||
],
|
||||
)
|
||||
def test_input_validation_errors(pyplot, kwargs, error_msg, fitted_clf):
|
||||
"""Check input validation from_estimator."""
|
||||
with pytest.raises(ValueError, match=error_msg):
|
||||
DecisionBoundaryDisplay.from_estimator(fitted_clf, X, **kwargs)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs, error_msg",
|
||||
[
|
||||
(
|
||||
{"multiclass_colors": {"dict": "not_list"}},
|
||||
"'multiclass_colors' must be a list or a str.",
|
||||
),
|
||||
({"multiclass_colors": "not_cmap"}, "it must be a valid Matplotlib colormap"),
|
||||
({"multiclass_colors": ["red", "green"]}, "it must be of the same length"),
|
||||
(
|
||||
{"multiclass_colors": ["red", "green", "not color"]},
|
||||
"it can only contain valid Matplotlib color names",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_input_validation_errors_multiclass_colors(pyplot, kwargs, error_msg):
|
||||
"""Check input validation for `multiclass_colors` in `from_estimator`."""
|
||||
X, y = load_iris_2d_scaled()
|
||||
clf = LogisticRegression().fit(X, y)
|
||||
with pytest.raises(ValueError, match=error_msg):
|
||||
DecisionBoundaryDisplay.from_estimator(clf, X, **kwargs)
|
||||
|
||||
|
||||
def test_display_plot_input_error(pyplot, fitted_clf):
|
||||
"""Check input validation for `plot`."""
|
||||
disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, X, grid_resolution=5)
|
||||
|
||||
with pytest.raises(ValueError, match="plot_method must be 'contourf'"):
|
||||
disp.plot(plot_method="hello_world")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["auto", "predict", "predict_proba", "decision_function"]
|
||||
)
|
||||
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
|
||||
def test_decision_boundary_display_classifier(
|
||||
pyplot, fitted_clf, response_method, plot_method
|
||||
):
|
||||
"""Check that decision boundary is correct."""
|
||||
fig, ax = pyplot.subplots()
|
||||
eps = 2.0
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
fitted_clf,
|
||||
X,
|
||||
grid_resolution=5,
|
||||
response_method=response_method,
|
||||
plot_method=plot_method,
|
||||
eps=eps,
|
||||
ax=ax,
|
||||
)
|
||||
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
|
||||
assert disp.ax_ == ax
|
||||
assert disp.figure_ == fig
|
||||
|
||||
x0, x1 = X[:, 0], X[:, 1]
|
||||
|
||||
x0_min, x0_max = x0.min() - eps, x0.max() + eps
|
||||
x1_min, x1_max = x1.min() - eps, x1.max() + eps
|
||||
|
||||
assert disp.xx0.min() == pytest.approx(x0_min)
|
||||
assert disp.xx0.max() == pytest.approx(x0_max)
|
||||
assert disp.xx1.min() == pytest.approx(x1_min)
|
||||
assert disp.xx1.max() == pytest.approx(x1_max)
|
||||
|
||||
fig2, ax2 = pyplot.subplots()
|
||||
# change plotting method for second plot
|
||||
disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
|
||||
assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
|
||||
assert disp.ax_ == ax2
|
||||
assert disp.figure_ == fig2
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method", ["auto", "predict", "decision_function"])
|
||||
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
|
||||
def test_decision_boundary_display_outlier_detector(
|
||||
pyplot, response_method, plot_method
|
||||
):
|
||||
"""Check that decision boundary is correct for outlier detector."""
|
||||
fig, ax = pyplot.subplots()
|
||||
eps = 2.0
|
||||
outlier_detector = IsolationForest(random_state=0).fit(X, y)
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
outlier_detector,
|
||||
X,
|
||||
grid_resolution=5,
|
||||
response_method=response_method,
|
||||
plot_method=plot_method,
|
||||
eps=eps,
|
||||
ax=ax,
|
||||
)
|
||||
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
|
||||
assert disp.ax_ == ax
|
||||
assert disp.figure_ == fig
|
||||
|
||||
x0, x1 = X[:, 0], X[:, 1]
|
||||
|
||||
x0_min, x0_max = x0.min() - eps, x0.max() + eps
|
||||
x1_min, x1_max = x1.min() - eps, x1.max() + eps
|
||||
|
||||
assert disp.xx0.min() == pytest.approx(x0_min)
|
||||
assert disp.xx0.max() == pytest.approx(x0_max)
|
||||
assert disp.xx1.min() == pytest.approx(x1_min)
|
||||
assert disp.xx1.max() == pytest.approx(x1_max)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method", ["auto", "predict"])
|
||||
@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
|
||||
def test_decision_boundary_display_regressor(pyplot, response_method, plot_method):
|
||||
"""Check that we can display the decision boundary for a regressor."""
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
X = X[:, :2]
|
||||
tree = DecisionTreeRegressor().fit(X, y)
|
||||
fig, ax = pyplot.subplots()
|
||||
eps = 2.0
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
tree,
|
||||
X,
|
||||
response_method=response_method,
|
||||
ax=ax,
|
||||
eps=eps,
|
||||
plot_method=plot_method,
|
||||
)
|
||||
assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
|
||||
assert disp.ax_ == ax
|
||||
assert disp.figure_ == fig
|
||||
|
||||
x0, x1 = X[:, 0], X[:, 1]
|
||||
|
||||
x0_min, x0_max = x0.min() - eps, x0.max() + eps
|
||||
x1_min, x1_max = x1.min() - eps, x1.max() + eps
|
||||
|
||||
assert disp.xx0.min() == pytest.approx(x0_min)
|
||||
assert disp.xx0.max() == pytest.approx(x0_max)
|
||||
assert disp.xx1.min() == pytest.approx(x1_min)
|
||||
assert disp.xx1.max() == pytest.approx(x1_max)
|
||||
|
||||
fig2, ax2 = pyplot.subplots()
|
||||
# change plotting method for second plot
|
||||
disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
|
||||
assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
|
||||
assert disp.ax_ == ax2
|
||||
assert disp.figure_ == fig2
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method, msg",
|
||||
[
|
||||
(
|
||||
"predict_proba",
|
||||
"MyClassifier has none of the following attributes: predict_proba",
|
||||
),
|
||||
(
|
||||
"decision_function",
|
||||
"MyClassifier has none of the following attributes: decision_function",
|
||||
),
|
||||
(
|
||||
"auto",
|
||||
(
|
||||
"MyClassifier has none of the following attributes: decision_function, "
|
||||
"predict_proba, predict"
|
||||
),
|
||||
),
|
||||
(
|
||||
"bad_method",
|
||||
"MyClassifier has none of the following attributes: bad_method",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_error_bad_response(pyplot, response_method, msg):
|
||||
"""Check errors for bad response."""
|
||||
|
||||
class MyClassifier(ClassifierMixin, BaseEstimator):
|
||||
def fit(self, X, y):
|
||||
self.fitted_ = True
|
||||
self.classes_ = [0, 1]
|
||||
return self
|
||||
|
||||
clf = MyClassifier().fit(X, y)
|
||||
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
DecisionBoundaryDisplay.from_estimator(clf, X, response_method=response_method)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
|
||||
def test_multilabel_classifier_error(pyplot, response_method):
|
||||
"""Check that multilabel classifier raises correct error."""
|
||||
X, y = make_multilabel_classification(random_state=0)
|
||||
X = X[:, :2]
|
||||
tree = DecisionTreeClassifier().fit(X, y)
|
||||
|
||||
msg = "Multi-label and multi-output multi-class classifiers are not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DecisionBoundaryDisplay.from_estimator(
|
||||
tree,
|
||||
X,
|
||||
response_method=response_method,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
|
||||
def test_multi_output_multi_class_classifier_error(pyplot, response_method):
|
||||
"""Check that multi-output multi-class classifier raises correct error."""
|
||||
X = np.asarray([[0, 1], [1, 2]])
|
||||
y = np.asarray([["tree", "cat"], ["cat", "tree"]])
|
||||
tree = DecisionTreeClassifier().fit(X, y)
|
||||
|
||||
msg = "Multi-label and multi-output multi-class classifiers are not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
DecisionBoundaryDisplay.from_estimator(
|
||||
tree,
|
||||
X,
|
||||
response_method=response_method,
|
||||
)
|
||||
|
||||
|
||||
def test_multioutput_regressor_error(pyplot):
|
||||
"""Check that multioutput regressor raises correct error."""
|
||||
X = np.asarray([[0, 1], [1, 2]])
|
||||
y = np.asarray([[0, 1], [4, 1]])
|
||||
tree = DecisionTreeRegressor().fit(X, y)
|
||||
with pytest.raises(ValueError, match="Multi-output regressors are not supported"):
|
||||
DecisionBoundaryDisplay.from_estimator(tree, X, response_method="predict")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method",
|
||||
["predict_proba", "decision_function", ["predict_proba", "predict"]],
|
||||
)
|
||||
def test_regressor_unsupported_response(pyplot, response_method):
|
||||
"""Check that we can display the decision boundary for a regressor."""
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
X = X[:, :2]
|
||||
tree = DecisionTreeRegressor().fit(X, y)
|
||||
err_msg = "should either be a classifier to be used with response_method"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
DecisionBoundaryDisplay.from_estimator(tree, X, response_method=response_method)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
# We expect to raise the following warning because the classifier is fit on a
|
||||
# NumPy array
|
||||
"ignore:X has feature names, but LogisticRegression was fitted without"
|
||||
)
|
||||
def test_dataframe_labels_used(pyplot, fitted_clf):
|
||||
"""Check that column names are used for pandas."""
|
||||
pd = pytest.importorskip("pandas")
|
||||
df = pd.DataFrame(X, columns=["col_x", "col_y"])
|
||||
|
||||
# pandas column names are used by default
|
||||
_, ax = pyplot.subplots()
|
||||
disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, df, ax=ax)
|
||||
assert ax.get_xlabel() == "col_x"
|
||||
assert ax.get_ylabel() == "col_y"
|
||||
|
||||
# second call to plot will have the names
|
||||
fig, ax = pyplot.subplots()
|
||||
disp.plot(ax=ax)
|
||||
assert ax.get_xlabel() == "col_x"
|
||||
assert ax.get_ylabel() == "col_y"
|
||||
|
||||
# axes with a label will not get overridden
|
||||
fig, ax = pyplot.subplots()
|
||||
ax.set(xlabel="hello", ylabel="world")
|
||||
disp.plot(ax=ax)
|
||||
assert ax.get_xlabel() == "hello"
|
||||
assert ax.get_ylabel() == "world"
|
||||
|
||||
# labels get overridden only if provided to the `plot` method
|
||||
disp.plot(ax=ax, xlabel="overwritten_x", ylabel="overwritten_y")
|
||||
assert ax.get_xlabel() == "overwritten_x"
|
||||
assert ax.get_ylabel() == "overwritten_y"
|
||||
|
||||
# labels do not get inferred if provided to `from_estimator`
|
||||
_, ax = pyplot.subplots()
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
fitted_clf, df, ax=ax, xlabel="overwritten_x", ylabel="overwritten_y"
|
||||
)
|
||||
assert ax.get_xlabel() == "overwritten_x"
|
||||
assert ax.get_ylabel() == "overwritten_y"
|
||||
|
||||
|
||||
def test_string_target(pyplot):
|
||||
"""Check that decision boundary works with classifiers trained on string labels."""
|
||||
iris = load_iris()
|
||||
X = iris.data[:, [0, 1]]
|
||||
|
||||
# Use strings as target
|
||||
y = iris.target_names[iris.target]
|
||||
log_reg = LogisticRegression().fit(X, y)
|
||||
|
||||
# Does not raise
|
||||
DecisionBoundaryDisplay.from_estimator(
|
||||
log_reg,
|
||||
X,
|
||||
grid_resolution=5,
|
||||
response_method="predict",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("constructor_name", ["pandas", "polars"])
|
||||
def test_dataframe_support(pyplot, constructor_name):
|
||||
"""Check that passing a dataframe at fit and to the Display does not
|
||||
raise warnings.
|
||||
|
||||
Non-regression test for:
|
||||
* https://github.com/scikit-learn/scikit-learn/issues/23311
|
||||
* https://github.com/scikit-learn/scikit-learn/issues/28717
|
||||
"""
|
||||
df = _convert_container(
|
||||
X, constructor_name=constructor_name, columns_name=["col_x", "col_y"]
|
||||
)
|
||||
estimator = LogisticRegression().fit(df, y)
|
||||
|
||||
with warnings.catch_warnings():
|
||||
# no warnings linked to feature names validation should be raised
|
||||
warnings.simplefilter("error", UserWarning)
|
||||
DecisionBoundaryDisplay.from_estimator(estimator, df, response_method="predict")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
|
||||
def test_class_of_interest_binary(pyplot, response_method):
|
||||
"""Check the behaviour of passing `class_of_interest` for plotting the output of
|
||||
`predict_proba` and `decision_function` in the binary case.
|
||||
"""
|
||||
iris = load_iris()
|
||||
X = iris.data[:100, :2]
|
||||
y = iris.target[:100]
|
||||
assert_array_equal(np.unique(y), [0, 1])
|
||||
|
||||
estimator = LogisticRegression().fit(X, y)
|
||||
# We will check that `class_of_interest=None` is equivalent to
|
||||
# `class_of_interest=estimator.classes_[1]`
|
||||
disp_default = DecisionBoundaryDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
response_method=response_method,
|
||||
class_of_interest=None,
|
||||
)
|
||||
disp_class_1 = DecisionBoundaryDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
response_method=response_method,
|
||||
class_of_interest=estimator.classes_[1],
|
||||
)
|
||||
|
||||
assert_allclose(disp_default.response, disp_class_1.response)
|
||||
|
||||
# we can check that `_get_response_values` modifies the response when targeting
|
||||
# the other class, i.e. 1 - p(y=1|x) for `predict_proba` and -decision_function
|
||||
# for `decision_function`.
|
||||
disp_class_0 = DecisionBoundaryDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
response_method=response_method,
|
||||
class_of_interest=estimator.classes_[0],
|
||||
)
|
||||
|
||||
if response_method == "predict_proba":
|
||||
assert_allclose(disp_default.response, 1 - disp_class_0.response)
|
||||
else:
|
||||
assert response_method == "decision_function"
|
||||
assert_allclose(disp_default.response, -disp_class_0.response)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
|
||||
def test_class_of_interest_multiclass(pyplot, response_method):
|
||||
"""Check the behaviour of passing `class_of_interest` for plotting the output of
|
||||
`predict_proba` and `decision_function` in the multiclass case.
|
||||
"""
|
||||
iris = load_iris()
|
||||
X = iris.data[:, :2]
|
||||
y = iris.target # the target are numerical labels
|
||||
class_of_interest_idx = 2
|
||||
|
||||
estimator = LogisticRegression().fit(X, y)
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
response_method=response_method,
|
||||
class_of_interest=class_of_interest_idx,
|
||||
)
|
||||
|
||||
# we will check that we plot the expected values as response
|
||||
grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
|
||||
response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
|
||||
assert_allclose(response.reshape(*disp.response.shape), disp.response)
|
||||
|
||||
# make the same test but this time using target as strings
|
||||
y = iris.target_names[iris.target]
|
||||
estimator = LogisticRegression().fit(X, y)
|
||||
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
response_method=response_method,
|
||||
class_of_interest=iris.target_names[class_of_interest_idx],
|
||||
)
|
||||
|
||||
grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
|
||||
response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
|
||||
assert_allclose(response.reshape(*disp.response.shape), disp.response)
|
||||
|
||||
# check that we raise an error for unknown labels
|
||||
# this test should already be handled in `_get_response_values` but we can have this
|
||||
# test here as well
|
||||
err_msg = "class_of_interest=2 is not a valid label: It should be one of"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
DecisionBoundaryDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
response_method=response_method,
|
||||
class_of_interest=class_of_interest_idx,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
|
||||
def test_multiclass_plot_max_class(pyplot, response_method):
|
||||
"""Check plot correct when plotting max multiclass class."""
|
||||
import matplotlib as mpl
|
||||
|
||||
# In matplotlib < v3.5, default value of `pcolormesh(shading)` is 'flat', which
|
||||
# results in the last row and column being dropped. Thus older versions produce
|
||||
# a 99x99 grid, while newer versions produce a 100x100 grid.
|
||||
if parse_version(mpl.__version__) < parse_version("3.5"):
|
||||
pytest.skip("`pcolormesh` in Matplotlib >= 3.5 gives smaller grid size.")
|
||||
|
||||
X, y = load_iris_2d_scaled()
|
||||
clf = LogisticRegression().fit(X, y)
|
||||
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
clf,
|
||||
X,
|
||||
plot_method="pcolormesh",
|
||||
response_method=response_method,
|
||||
)
|
||||
|
||||
grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
|
||||
response = getattr(clf, response_method)(grid).reshape(*disp.response.shape)
|
||||
assert_allclose(response, disp.response)
|
||||
|
||||
assert len(disp.surface_) == len(clf.classes_)
|
||||
# Get which class has highest response and check it is plotted
|
||||
highest_class = np.argmax(response, axis=2)
|
||||
for idx, quadmesh in enumerate(disp.surface_):
|
||||
# Note quadmesh mask is True (i.e. masked) when `idx` is NOT the highest class
|
||||
assert_array_equal(
|
||||
highest_class != idx,
|
||||
quadmesh.get_array().mask.reshape(*highest_class.shape),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"multiclass_colors",
|
||||
[
|
||||
"plasma",
|
||||
"Blues",
|
||||
["red", "green", "blue"],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("plot_method", ["contourf", "contour", "pcolormesh"])
|
||||
def test_multiclass_colors_cmap(pyplot, plot_method, multiclass_colors):
|
||||
"""Check correct cmap used for all `multiclass_colors` inputs."""
|
||||
import matplotlib as mpl
|
||||
|
||||
if parse_version(mpl.__version__) < parse_version("3.5"):
|
||||
pytest.skip(
|
||||
"Matplotlib >= 3.5 is needed for `==` to check equivalence of colormaps"
|
||||
)
|
||||
|
||||
X, y = load_iris_2d_scaled()
|
||||
clf = LogisticRegression().fit(X, y)
|
||||
|
||||
disp = DecisionBoundaryDisplay.from_estimator(
|
||||
clf,
|
||||
X,
|
||||
plot_method=plot_method,
|
||||
multiclass_colors=multiclass_colors,
|
||||
)
|
||||
|
||||
if multiclass_colors == "plasma":
|
||||
colors = mpl.pyplot.get_cmap(multiclass_colors, len(clf.classes_)).colors
|
||||
elif multiclass_colors == "Blues":
|
||||
cmap = mpl.pyplot.get_cmap(multiclass_colors, len(clf.classes_))
|
||||
colors = cmap(np.linspace(0, 1, len(clf.classes_)))
|
||||
else:
|
||||
colors = [mpl.colors.to_rgba(color) for color in multiclass_colors]
|
||||
|
||||
if plot_method != "contour":
|
||||
cmaps = [
|
||||
mpl.colors.LinearSegmentedColormap.from_list(
|
||||
f"colormap_{class_idx}", [(1.0, 1.0, 1.0, 1.0), (r, g, b, 1.0)]
|
||||
)
|
||||
for class_idx, (r, g, b, _) in enumerate(colors)
|
||||
]
|
||||
for idx, quad in enumerate(disp.surface_):
|
||||
assert quad.cmap == cmaps[idx]
|
||||
else:
|
||||
assert_allclose(disp.surface_.colors, colors)
|
||||
|
||||
|
||||
def test_cmap_and_colors_logic(pyplot):
|
||||
"""Check the handling logic for `cmap` and `colors`."""
|
||||
X, y = load_iris_2d_scaled()
|
||||
clf = LogisticRegression().fit(X, y)
|
||||
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match="'cmap' is ignored in favor of 'multiclass_colors'",
|
||||
):
|
||||
DecisionBoundaryDisplay.from_estimator(
|
||||
clf,
|
||||
X,
|
||||
multiclass_colors="plasma",
|
||||
cmap="Blues",
|
||||
)
|
||||
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match="'colors' is ignored in favor of 'multiclass_colors'",
|
||||
):
|
||||
DecisionBoundaryDisplay.from_estimator(
|
||||
clf,
|
||||
X,
|
||||
multiclass_colors="plasma",
|
||||
colors="blue",
|
||||
)
|
||||
|
||||
|
||||
def test_subclass_named_constructors_return_type_is_subclass(pyplot):
|
||||
"""Check that named constructors return the correct type when subclassed.
|
||||
|
||||
Non-regression test for:
|
||||
https://github.com/scikit-learn/scikit-learn/pull/27675
|
||||
"""
|
||||
clf = LogisticRegression().fit(X, y)
|
||||
|
||||
class SubclassOfDisplay(DecisionBoundaryDisplay):
|
||||
pass
|
||||
|
||||
curve = SubclassOfDisplay.from_estimator(estimator=clf, X=X)
|
||||
|
||||
assert isinstance(curve, SubclassOfDisplay)
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,47 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index
|
||||
from sklearn.utils._testing import _convert_container
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"feature_names, array_type, expected_feature_names",
|
||||
[
|
||||
(None, "array", ["x0", "x1", "x2"]),
|
||||
(None, "dataframe", ["a", "b", "c"]),
|
||||
(np.array(["a", "b", "c"]), "array", ["a", "b", "c"]),
|
||||
],
|
||||
)
|
||||
def test_check_feature_names(feature_names, array_type, expected_feature_names):
|
||||
X = np.random.randn(10, 3)
|
||||
column_names = ["a", "b", "c"]
|
||||
X = _convert_container(X, constructor_name=array_type, columns_name=column_names)
|
||||
feature_names_validated = _check_feature_names(X, feature_names)
|
||||
assert feature_names_validated == expected_feature_names
|
||||
|
||||
|
||||
def test_check_feature_names_error():
|
||||
X = np.random.randn(10, 3)
|
||||
feature_names = ["a", "b", "c", "a"]
|
||||
msg = "feature_names should not contain duplicates."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
_check_feature_names(X, feature_names)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fx, idx", [(0, 0), (1, 1), ("a", 0), ("b", 1), ("c", 2)])
|
||||
def test_get_feature_index(fx, idx):
|
||||
feature_names = ["a", "b", "c"]
|
||||
assert _get_feature_index(fx, feature_names) == idx
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fx, feature_names, err_msg",
|
||||
[
|
||||
("a", None, "Cannot plot partial dependence for feature 'a'"),
|
||||
("d", ["a", "b", "c"], "Feature 'd' not in feature_names"),
|
||||
],
|
||||
)
|
||||
def test_get_feature_names_error(fx, feature_names, err_msg):
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
_get_feature_index(fx, feature_names)
|
||||
@@ -0,0 +1,540 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from joblib import parallel_backend
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from sklearn.compose import ColumnTransformer
|
||||
from sklearn.datasets import (
|
||||
load_diabetes,
|
||||
load_iris,
|
||||
make_classification,
|
||||
make_regression,
|
||||
)
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
|
||||
from sklearn.impute import SimpleImputer
|
||||
from sklearn.inspection import permutation_importance
|
||||
from sklearn.linear_model import LinearRegression, LogisticRegression
|
||||
from sklearn.metrics import (
|
||||
get_scorer,
|
||||
mean_squared_error,
|
||||
r2_score,
|
||||
)
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale
|
||||
from sklearn.utils._testing import _convert_container
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
|
||||
@pytest.mark.parametrize("sample_weight", [None, "ones"])
|
||||
def test_permutation_importance_correlated_feature_regression(
|
||||
n_jobs, max_samples, sample_weight
|
||||
):
|
||||
# Make sure that feature highly correlated to the target have a higher
|
||||
# importance
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
X, y = load_diabetes(return_X_y=True)
|
||||
y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
|
||||
|
||||
X = np.hstack([X, y_with_little_noise])
|
||||
|
||||
weights = np.ones_like(y) if sample_weight == "ones" else sample_weight
|
||||
clf = RandomForestRegressor(n_estimators=10, random_state=42)
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(
|
||||
clf,
|
||||
X,
|
||||
y,
|
||||
sample_weight=weights,
|
||||
n_repeats=n_repeats,
|
||||
random_state=rng,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y was added as the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
|
||||
def test_permutation_importance_correlated_feature_regression_pandas(
|
||||
n_jobs, max_samples
|
||||
):
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
# Make sure that feature highly correlated to the target have a higher
|
||||
# importance
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
dataset = load_iris()
|
||||
X, y = dataset.data, dataset.target
|
||||
y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
|
||||
|
||||
# Adds feature correlated with y as the last column
|
||||
X = pd.DataFrame(X, columns=dataset.feature_names)
|
||||
X["correlated_feature"] = y_with_little_noise
|
||||
|
||||
clf = RandomForestClassifier(n_estimators=10, random_state=42)
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(
|
||||
clf,
|
||||
X,
|
||||
y,
|
||||
n_repeats=n_repeats,
|
||||
random_state=rng,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y was added as the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [1, 2])
|
||||
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
|
||||
def test_robustness_to_high_cardinality_noisy_feature(n_jobs, max_samples, seed=42):
|
||||
# Permutation variable importance should not be affected by the high
|
||||
# cardinality bias of traditional feature importances, especially when
|
||||
# computed on a held-out test set:
|
||||
rng = np.random.RandomState(seed)
|
||||
n_repeats = 5
|
||||
n_samples = 1000
|
||||
n_classes = 5
|
||||
n_informative_features = 2
|
||||
n_noise_features = 1
|
||||
n_features = n_informative_features + n_noise_features
|
||||
|
||||
# Generate a multiclass classification dataset and a set of informative
|
||||
# binary features that can be used to predict some classes of y exactly
|
||||
# while leaving some classes unexplained to make the problem harder.
|
||||
classes = np.arange(n_classes)
|
||||
y = rng.choice(classes, size=n_samples)
|
||||
X = np.hstack([(y == c).reshape(-1, 1) for c in classes[:n_informative_features]])
|
||||
X = X.astype(np.float32)
|
||||
|
||||
# Not all target classes are explained by the binary class indicator
|
||||
# features:
|
||||
assert n_informative_features < n_classes
|
||||
|
||||
# Add 10 other noisy features with high cardinality (numerical) values
|
||||
# that can be used to overfit the training data.
|
||||
X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
|
||||
assert X.shape == (n_samples, n_features)
|
||||
|
||||
# Split the dataset to be able to evaluate on a held-out test set. The
|
||||
# Test size should be large enough for importance measurements to be
|
||||
# stable:
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.5, random_state=rng
|
||||
)
|
||||
clf = RandomForestClassifier(n_estimators=5, random_state=rng)
|
||||
clf.fit(X_train, y_train)
|
||||
|
||||
# Variable importances computed by impurity decrease on the tree node
|
||||
# splits often use the noisy features in splits. This can give misleading
|
||||
# impression that high cardinality noisy variables are the most important:
|
||||
tree_importances = clf.feature_importances_
|
||||
informative_tree_importances = tree_importances[:n_informative_features]
|
||||
noisy_tree_importances = tree_importances[n_informative_features:]
|
||||
assert informative_tree_importances.max() < noisy_tree_importances.min()
|
||||
|
||||
# Let's check that permutation-based feature importances do not have this
|
||||
# problem.
|
||||
r = permutation_importance(
|
||||
clf,
|
||||
X_test,
|
||||
y_test,
|
||||
n_repeats=n_repeats,
|
||||
random_state=rng,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
|
||||
assert r.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# Split the importances between informative and noisy features
|
||||
informative_importances = r.importances_mean[:n_informative_features]
|
||||
noisy_importances = r.importances_mean[n_informative_features:]
|
||||
|
||||
# Because we do not have a binary variable explaining each target classes,
|
||||
# the RF model will have to use the random variable to make some
|
||||
# (overfitting) splits (as max_depth is not set). Therefore the noisy
|
||||
# variables will be non-zero but with small values oscillating around
|
||||
# zero:
|
||||
assert max(np.abs(noisy_importances)) > 1e-7
|
||||
assert noisy_importances.max() < 0.05
|
||||
|
||||
# The binary features correlated with y should have a higher importance
|
||||
# than the high cardinality noisy features.
|
||||
# The maximum test accuracy is 2 / 5 == 0.4, each informative feature
|
||||
# contributing approximately a bit more than 0.2 of accuracy.
|
||||
assert informative_importances.min() > 0.15
|
||||
|
||||
|
||||
def test_permutation_importance_mixed_types():
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 4
|
||||
|
||||
# Last column is correlated with y
|
||||
X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
clf = make_pipeline(SimpleImputer(), LogisticRegression(solver="lbfgs"))
|
||||
clf.fit(X, y)
|
||||
result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
# use another random state
|
||||
rng = np.random.RandomState(0)
|
||||
result2 = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
|
||||
assert result2.importances.shape == (X.shape[1], n_repeats)
|
||||
|
||||
assert not np.allclose(result.importances, result2.importances)
|
||||
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
|
||||
|
||||
|
||||
def test_permutation_importance_mixed_types_pandas():
|
||||
pd = pytest.importorskip("pandas")
|
||||
rng = np.random.RandomState(42)
|
||||
n_repeats = 5
|
||||
|
||||
# Last column is correlated with y
|
||||
X = pd.DataFrame({"col1": [1.0, 2.0, 3.0, np.nan], "col2": ["a", "b", "a", "b"]})
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
|
||||
preprocess = ColumnTransformer(
|
||||
[("num", num_preprocess, ["col1"]), ("cat", OneHotEncoder(), ["col2"])]
|
||||
)
|
||||
clf = make_pipeline(preprocess, LogisticRegression(solver="lbfgs"))
|
||||
clf.fit(X, y)
|
||||
|
||||
result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
|
||||
|
||||
assert result.importances.shape == (X.shape[1], n_repeats)
|
||||
# the correlated feature with y is the last column and should
|
||||
# have the highest importance
|
||||
assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
|
||||
|
||||
|
||||
def test_permutation_importance_linear_regresssion():
|
||||
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
|
||||
|
||||
X = scale(X)
|
||||
y = scale(y)
|
||||
|
||||
lr = LinearRegression().fit(X, y)
|
||||
|
||||
# this relationship can be computed in closed form
|
||||
expected_importances = 2 * lr.coef_**2
|
||||
results = permutation_importance(
|
||||
lr, X, y, n_repeats=50, scoring="neg_mean_squared_error"
|
||||
)
|
||||
assert_allclose(
|
||||
expected_importances, results.importances_mean, rtol=1e-1, atol=1e-6
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("max_samples", [500, 1.0])
|
||||
def test_permutation_importance_equivalence_sequential_parallel(max_samples):
|
||||
# regression test to make sure that sequential and parallel calls will
|
||||
# output the same results.
|
||||
# Also tests that max_samples equal to number of samples is equivalent to 1.0
|
||||
X, y = make_regression(n_samples=500, n_features=10, random_state=0)
|
||||
lr = LinearRegression().fit(X, y)
|
||||
|
||||
importance_sequential = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=1, max_samples=max_samples
|
||||
)
|
||||
|
||||
# First check that the problem is structured enough and that the model is
|
||||
# complex enough to not yield trivial, constant importances:
|
||||
imp_min = importance_sequential["importances"].min()
|
||||
imp_max = importance_sequential["importances"].max()
|
||||
assert imp_max - imp_min > 0.3
|
||||
|
||||
# The actually check that parallelism does not impact the results
|
||||
# either with shared memory (threading) or without isolated memory
|
||||
# via process-based parallelism using the default backend
|
||||
# ('loky' or 'multiprocessing') depending on the joblib version:
|
||||
|
||||
# process-based parallelism (by default):
|
||||
importance_processes = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
|
||||
)
|
||||
assert_allclose(
|
||||
importance_processes["importances"], importance_sequential["importances"]
|
||||
)
|
||||
|
||||
# thread-based parallelism:
|
||||
with parallel_backend("threading"):
|
||||
importance_threading = permutation_importance(
|
||||
lr, X, y, n_repeats=5, random_state=0, n_jobs=2
|
||||
)
|
||||
assert_allclose(
|
||||
importance_threading["importances"], importance_sequential["importances"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_jobs", [None, 1, 2])
|
||||
@pytest.mark.parametrize("max_samples", [0.5, 1.0])
|
||||
def test_permutation_importance_equivalence_array_dataframe(n_jobs, max_samples):
|
||||
# This test checks that the column shuffling logic has the same behavior
|
||||
# both a dataframe and a simple numpy array.
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
# regression test to make sure that sequential and parallel calls will
|
||||
# output the same results.
|
||||
X, y = make_regression(n_samples=100, n_features=5, random_state=0)
|
||||
X_df = pd.DataFrame(X)
|
||||
|
||||
# Add a categorical feature that is statistically linked to y:
|
||||
binner = KBinsDiscretizer(
|
||||
n_bins=3,
|
||||
encode="ordinal",
|
||||
quantile_method="averaged_inverted_cdf",
|
||||
)
|
||||
cat_column = binner.fit_transform(y.reshape(-1, 1))
|
||||
|
||||
# Concatenate the extra column to the numpy array: integers will be
|
||||
# cast to float values
|
||||
X = np.hstack([X, cat_column])
|
||||
assert X.dtype.kind == "f"
|
||||
|
||||
# Insert extra column as a non-numpy-native dtype:
|
||||
cat_column = pd.Categorical(cat_column.ravel())
|
||||
new_col_idx = len(X_df.columns)
|
||||
X_df[new_col_idx] = cat_column
|
||||
assert X_df[new_col_idx].dtype == cat_column.dtype
|
||||
|
||||
# Stich an arbitrary index to the dataframe:
|
||||
X_df.index = np.arange(len(X_df)).astype(str)
|
||||
|
||||
rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
|
||||
rf.fit(X, y)
|
||||
|
||||
n_repeats = 3
|
||||
importance_array = permutation_importance(
|
||||
rf,
|
||||
X,
|
||||
y,
|
||||
n_repeats=n_repeats,
|
||||
random_state=0,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
|
||||
# First check that the problem is structured enough and that the model is
|
||||
# complex enough to not yield trivial, constant importances:
|
||||
imp_min = importance_array["importances"].min()
|
||||
imp_max = importance_array["importances"].max()
|
||||
assert imp_max - imp_min > 0.3
|
||||
|
||||
# Now check that importances computed on dataframe matche the values
|
||||
# of those computed on the array with the same data.
|
||||
importance_dataframe = permutation_importance(
|
||||
rf,
|
||||
X_df,
|
||||
y,
|
||||
n_repeats=n_repeats,
|
||||
random_state=0,
|
||||
n_jobs=n_jobs,
|
||||
max_samples=max_samples,
|
||||
)
|
||||
assert_allclose(
|
||||
importance_array["importances"], importance_dataframe["importances"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("input_type", ["array", "dataframe"])
|
||||
def test_permutation_importance_large_memmaped_data(input_type):
|
||||
# Smoke, non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/15810
|
||||
n_samples, n_features = int(5e4), 4
|
||||
X, y = make_classification(
|
||||
n_samples=n_samples, n_features=n_features, random_state=0
|
||||
)
|
||||
assert X.nbytes > 1e6 # trigger joblib memmaping
|
||||
|
||||
X = _convert_container(X, input_type)
|
||||
clf = DummyClassifier(strategy="prior").fit(X, y)
|
||||
|
||||
# Actual smoke test: should not raise any error:
|
||||
n_repeats = 5
|
||||
r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
|
||||
|
||||
# Auxiliary check: DummyClassifier is feature independent:
|
||||
# permutating feature should not change the predictions
|
||||
expected_importances = np.zeros((n_features, n_repeats))
|
||||
assert_allclose(expected_importances, r.importances)
|
||||
|
||||
|
||||
def test_permutation_importance_sample_weight():
|
||||
# Creating data with 2 features and 1000 samples, where the target
|
||||
# variable is a linear combination of the two features, such that
|
||||
# in half of the samples the impact of feature 1 is twice the impact of
|
||||
# feature 2, and vice versa on the other half of the samples.
|
||||
rng = np.random.RandomState(1)
|
||||
n_samples = 1000
|
||||
n_features = 2
|
||||
n_half_samples = n_samples // 2
|
||||
x = rng.normal(0.0, 0.001, (n_samples, n_features))
|
||||
y = np.zeros(n_samples)
|
||||
y[:n_half_samples] = 2 * x[:n_half_samples, 0] + x[:n_half_samples, 1]
|
||||
y[n_half_samples:] = x[n_half_samples:, 0] + 2 * x[n_half_samples:, 1]
|
||||
|
||||
# Fitting linear regression with perfect prediction
|
||||
lr = LinearRegression(fit_intercept=False)
|
||||
lr.fit(x, y)
|
||||
|
||||
# When all samples are weighted with the same weights, the ratio of
|
||||
# the two features importance should equal to 1 on expectation (when using
|
||||
# mean absolutes error as the loss function).
|
||||
pi = permutation_importance(
|
||||
lr, x, y, random_state=1, scoring="neg_mean_absolute_error", n_repeats=200
|
||||
)
|
||||
x1_x2_imp_ratio_w_none = pi.importances_mean[0] / pi.importances_mean[1]
|
||||
assert x1_x2_imp_ratio_w_none == pytest.approx(1, 0.01)
|
||||
|
||||
# When passing a vector of ones as the sample_weight, results should be
|
||||
# the same as in the case that sample_weight=None.
|
||||
w = np.ones(n_samples)
|
||||
pi = permutation_importance(
|
||||
lr,
|
||||
x,
|
||||
y,
|
||||
random_state=1,
|
||||
scoring="neg_mean_absolute_error",
|
||||
n_repeats=200,
|
||||
sample_weight=w,
|
||||
)
|
||||
x1_x2_imp_ratio_w_ones = pi.importances_mean[0] / pi.importances_mean[1]
|
||||
assert x1_x2_imp_ratio_w_ones == pytest.approx(x1_x2_imp_ratio_w_none, 0.01)
|
||||
|
||||
# When the ratio between the weights of the first half of the samples and
|
||||
# the second half of the samples approaches to infinity, the ratio of
|
||||
# the two features importance should equal to 2 on expectation (when using
|
||||
# mean absolutes error as the loss function).
|
||||
w = np.hstack([np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)])
|
||||
lr.fit(x, y, w)
|
||||
pi = permutation_importance(
|
||||
lr,
|
||||
x,
|
||||
y,
|
||||
random_state=1,
|
||||
scoring="neg_mean_absolute_error",
|
||||
n_repeats=200,
|
||||
sample_weight=w,
|
||||
)
|
||||
x1_x2_imp_ratio_w = pi.importances_mean[0] / pi.importances_mean[1]
|
||||
assert x1_x2_imp_ratio_w / x1_x2_imp_ratio_w_none == pytest.approx(2, 0.01)
|
||||
|
||||
|
||||
def test_permutation_importance_no_weights_scoring_function():
|
||||
# Creating a scorer function that does not takes sample_weight
|
||||
def my_scorer(estimator, X, y):
|
||||
return 1
|
||||
|
||||
# Creating some data and estimator for the permutation test
|
||||
x = np.array([[1, 2], [3, 4]])
|
||||
y = np.array([1, 2])
|
||||
w = np.array([1, 1])
|
||||
lr = LinearRegression()
|
||||
lr.fit(x, y)
|
||||
|
||||
# test that permutation_importance does not return error when
|
||||
# sample_weight is None
|
||||
try:
|
||||
permutation_importance(lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1)
|
||||
except TypeError:
|
||||
pytest.fail(
|
||||
"permutation_test raised an error when using a scorer "
|
||||
"function that does not accept sample_weight even though "
|
||||
"sample_weight was None"
|
||||
)
|
||||
|
||||
# test that permutation_importance raise exception when sample_weight is
|
||||
# not None
|
||||
with pytest.raises(TypeError):
|
||||
permutation_importance(
|
||||
lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1, sample_weight=w
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"list_single_scorer, multi_scorer",
|
||||
[
|
||||
(["r2", "neg_mean_squared_error"], ["r2", "neg_mean_squared_error"]),
|
||||
(
|
||||
["r2", "neg_mean_squared_error"],
|
||||
{
|
||||
"r2": get_scorer("r2"),
|
||||
"neg_mean_squared_error": get_scorer("neg_mean_squared_error"),
|
||||
},
|
||||
),
|
||||
(
|
||||
["r2", "neg_mean_squared_error"],
|
||||
lambda estimator, X, y: {
|
||||
"r2": r2_score(y, estimator.predict(X)),
|
||||
"neg_mean_squared_error": -mean_squared_error(y, estimator.predict(X)),
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_permutation_importance_multi_metric(list_single_scorer, multi_scorer):
|
||||
# Test permutation importance when scoring contains multiple scorers
|
||||
|
||||
# Creating some data and estimator for the permutation test
|
||||
x, y = make_regression(n_samples=500, n_features=10, random_state=0)
|
||||
lr = LinearRegression().fit(x, y)
|
||||
|
||||
multi_importance = permutation_importance(
|
||||
lr, x, y, random_state=1, scoring=multi_scorer, n_repeats=2
|
||||
)
|
||||
assert set(multi_importance.keys()) == set(list_single_scorer)
|
||||
|
||||
for scorer in list_single_scorer:
|
||||
multi_result = multi_importance[scorer]
|
||||
single_result = permutation_importance(
|
||||
lr, x, y, random_state=1, scoring=scorer, n_repeats=2
|
||||
)
|
||||
|
||||
assert_allclose(multi_result.importances, single_result.importances)
|
||||
|
||||
|
||||
def test_permutation_importance_max_samples_error():
|
||||
"""Check that a proper error message is raised when `max_samples` is not
|
||||
set to a valid input value.
|
||||
"""
|
||||
X = np.array([(1.0, 2.0, 3.0, 4.0)]).T
|
||||
y = np.array([0, 1, 0, 1])
|
||||
|
||||
clf = LogisticRegression()
|
||||
clf.fit(X, y)
|
||||
|
||||
err_msg = r"max_samples must be <= n_samples"
|
||||
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
permutation_importance(clf, X, y, max_samples=5)
|
||||
Reference in New Issue
Block a user