add read me

2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/init.py
@@ -0,0 +1,16 @@
+"""Tools for model inspection."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from ._partial_dependence import partial_dependence
+from ._permutation_importance import permutation_importance
+from ._plot.decision_boundary import DecisionBoundaryDisplay
+from ._plot.partial_dependence import PartialDependenceDisplay
+
+__all__ = [
+    "DecisionBoundaryDisplay",
+    "PartialDependenceDisplay",
+    "partial_dependence",
+    "permutation_importance",
+]
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_partial_dependence.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_partial_dependence.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_pd_utils.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_pd_utils.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_permutation_importance.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/pycache/_permutation_importance.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_partial_dependence.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_partial_dependence.py
@@ -0,0 +1,775 @@
+"""Partial dependence plots for regression and classification models."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+from collections.abc import Iterable
+
+import numpy as np
+from scipy import sparse
+from scipy.stats.mstats import mquantiles
+
+from ..base import is_classifier, is_regressor
+from ..ensemble import RandomForestRegressor
+from ..ensemble._gb import BaseGradientBoosting
+from ..ensemble._hist_gradient_boosting.gradient_boosting import (
+    BaseHistGradientBoosting,
+)
+from ..tree import DecisionTreeRegressor
+from ..utils import Bunch, _safe_indexing, check_array
+from ..utils._indexing import _determine_key_type, _get_column_indices, _safe_assign
+from ..utils._optional_dependencies import check_matplotlib_support  # noqa: F401
+from ..utils._param_validation import (
+    HasMethods,
+    Integral,
+    Interval,
+    StrOptions,
+    validate_params,
+)
+from ..utils._response import _get_response_values
+from ..utils.extmath import cartesian
+from ..utils.validation import _check_sample_weight, check_is_fitted
+from ._pd_utils import _check_feature_names, _get_feature_index
+
+__all__ = [
+    "partial_dependence",
+]
+
+
+def _grid_from_X(X, percentiles, is_categorical, grid_resolution, custom_values):
+    """Generate a grid of points based on the percentiles of X.
+
+    The grid is a cartesian product between the columns of ``values``. The
+    ith column of ``values`` consists in ``grid_resolution`` equally-spaced
+    points between the percentiles of the jth column of X.
+
+    If ``grid_resolution`` is bigger than the number of unique values in the
+    j-th column of X or if the feature is a categorical feature (by inspecting
+    `is_categorical`) , then those unique values will be used instead.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_target_features)
+        The data.
+
+    percentiles : tuple of float
+        The percentiles which are used to construct the extreme values of
+        the grid. Must be in [0, 1].
+
+    is_categorical : list of bool
+        For each feature, tells whether it is categorical or not. If a feature
+        is categorical, then the values used will be the unique ones
+        (i.e. categories) instead of the percentiles.
+
+    grid_resolution : int
+        The number of equally spaced points to be placed on the grid for each
+        feature.
+
+    custom_values: dict
+        Mapping from column index of X to an array-like of values where
+        the partial dependence should be calculated for that feature
+
+    Returns
+    -------
+    grid : ndarray of shape (n_points, n_target_features)
+        A value for each feature at each point in the grid. ``n_points`` is
+        always ``<= grid_resolution ** X.shape[1]``.
+
+    values : list of 1d ndarrays
+        The values with which the grid has been created. The size of each
+        array ``values[j]`` is either ``grid_resolution``, the number of
+        unique values in ``X[:, j]``, if j is not in ``custom_range``.
+        If j is in ``custom_range``, then it is the length of ``custom_range[j]``.
+    """
+    if not isinstance(percentiles, Iterable) or len(percentiles) != 2:
+        raise ValueError("'percentiles' must be a sequence of 2 elements.")
+    if not all(0 <= x <= 1 for x in percentiles):
+        raise ValueError("'percentiles' values must be in [0, 1].")
+    if percentiles[0] >= percentiles[1]:
+        raise ValueError("percentiles[0] must be strictly less than percentiles[1].")
+
+    if grid_resolution <= 1:
+        raise ValueError("'grid_resolution' must be strictly greater than 1.")
+
+    def _convert_custom_values(values):
+        # Convert custom types such that object types are always used for string arrays
+        dtype = object if any(isinstance(v, str) for v in values) else None
+        return np.asarray(values, dtype=dtype)
+
+    custom_values = {k: _convert_custom_values(v) for k, v in custom_values.items()}
+    if any(v.ndim != 1 for v in custom_values.values()):
+        error_string = ", ".join(
+            f"Feature {k}: {v.ndim} dimensions"
+            for k, v in custom_values.items()
+            if v.ndim != 1
+        )
+
+        raise ValueError(
+            "The custom grid for some features is not a one-dimensional array. "
+            f"{error_string}"
+        )
+
+    values = []
+    # TODO: we should handle missing values (i.e. `np.nan`) specifically and store them
+    # in a different Bunch attribute.
+    for feature, is_cat in enumerate(is_categorical):
+        if feature in custom_values:
+            # Use values in the custom range
+            axis = custom_values[feature]
+        else:
+            try:
+                uniques = np.unique(_safe_indexing(X, feature, axis=1))
+            except TypeError as exc:
+                # `np.unique` will fail in the presence of `np.nan` and `str` categories
+                # due to sorting. Temporary, we reraise an error explaining the problem.
+                raise ValueError(
+                    f"The column #{feature} contains mixed data types. Finding unique "
+                    "categories fail due to sorting. It usually means that the column "
+                    "contains `np.nan` values together with `str` categories. Such use "
+                    "case is not yet supported in scikit-learn."
+                ) from exc
+
+            if is_cat or uniques.shape[0] < grid_resolution:
+                # Use the unique values either because:
+                # - feature has low resolution use unique values
+                # - feature is categorical
+                axis = uniques
+            else:
+                # create axis based on percentiles and grid resolution
+                emp_percentiles = mquantiles(
+                    _safe_indexing(X, feature, axis=1), prob=percentiles, axis=0
+                )
+                if np.allclose(emp_percentiles[0], emp_percentiles[1]):
+                    raise ValueError(
+                        "percentiles are too close to each other, "
+                        "unable to build the grid. Please choose percentiles "
+                        "that are further apart."
+                    )
+                axis = np.linspace(
+                    emp_percentiles[0],
+                    emp_percentiles[1],
+                    num=grid_resolution,
+                    endpoint=True,
+                )
+        values.append(axis)
+
+    return cartesian(values), values
+
+
+def _partial_dependence_recursion(est, grid, features):
+    """Calculate partial dependence via the recursion method.
+
+    The recursion method is in particular enabled for tree-based estimators.
+
+    For each `grid` value, a weighted tree traversal is performed: if a split node
+    involves an input feature of interest, the corresponding left or right branch
+    is followed; otherwise both branches are followed, each branch being weighted
+    by the fraction of training samples that entered that branch. Finally, the
+    partial dependence is given by a weighted average of all the visited leaves
+    values.
+
+    This method is more efficient in terms of speed than the `'brute'` method
+    (:func:`~sklearn.inspection._partial_dependence._partial_dependence_brute`).
+    However, here, the partial dependence computation is done explicitly with the
+    `X` used during training of `est`.
+
+    Parameters
+    ----------
+    est : BaseEstimator
+        A fitted estimator object implementing :term:`predict` or
+        :term:`decision_function`. Multioutput-multiclass classifiers are not
+        supported. Note that `'recursion'` is only supported for some tree-based
+        estimators (namely
+        :class:`~sklearn.ensemble.GradientBoostingClassifier`,
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`,
+        :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
+        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
+        :class:`~sklearn.tree.DecisionTreeRegressor`,
+        :class:`~sklearn.ensemble.RandomForestRegressor`,
+        ).
+
+    grid : array-like of shape (n_points, n_target_features)
+        The grid of feature values for which the partial dependence is calculated.
+        Note that `n_points` is the number of points in the grid and `n_target_features`
+        is the number of features you are doing partial dependence at.
+
+    features : array-like of {int, str}
+        The feature (e.g. `[0]`) or pair of interacting features
+        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.
+
+    Returns
+    -------
+    averaged_predictions : array-like of shape (n_targets, n_points)
+        The averaged predictions for the given `grid` of features values.
+        Note that `n_targets` is the number of targets (e.g. 1 for binary
+        classification, `n_tasks` for multi-output regression, and `n_classes` for
+        multiclass classification) and `n_points` is the number of points in the `grid`.
+    """
+    averaged_predictions = est._compute_partial_dependence_recursion(grid, features)
+    if averaged_predictions.ndim == 1:
+        # reshape to (1, n_points) for consistency with
+        # _partial_dependence_brute
+        averaged_predictions = averaged_predictions.reshape(1, -1)
+
+    return averaged_predictions
+
+
+def _partial_dependence_brute(
+    est, grid, features, X, response_method, sample_weight=None
+):
+    """Calculate partial dependence via the brute force method.
+
+    The brute method explicitly averages the predictions of an estimator over a
+    grid of feature values.
+
+    For each `grid` value, all the samples from `X` have their variables of
+    interest replaced by that specific `grid` value. The predictions are then made
+    and averaged across the samples.
+
+    This method is slower than the `'recursion'`
+    (:func:`~sklearn.inspection._partial_dependence._partial_dependence_recursion`)
+    version for estimators with this second option. However, with the `'brute'`
+    force method, the average will be done with the given `X` and not the `X`
+    used during training, as it is done in the `'recursion'` version. Therefore
+    the average can always accept `sample_weight` (even when the estimator was
+    fitted without).
+
+    Parameters
+    ----------
+    est : BaseEstimator
+        A fitted estimator object implementing :term:`predict`,
+        :term:`predict_proba`, or :term:`decision_function`.
+        Multioutput-multiclass classifiers are not supported.
+
+    grid : array-like of shape (n_points, n_target_features)
+        The grid of feature values for which the partial dependence is calculated.
+        Note that `n_points` is the number of points in the grid and `n_target_features`
+        is the number of features you are doing partial dependence at.
+
+    features : array-like of {int, str}
+        The feature (e.g. `[0]`) or pair of interacting features
+        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.
+
+    X : array-like of shape (n_samples, n_features)
+        `X` is used to generate values for the complement features. That is, for
+        each value in `grid`, the method will average the prediction of each
+        sample from `X` having that grid value for `features`.
+
+    response_method : {'auto', 'predict_proba', 'decision_function'}, \
+            default='auto'
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. For regressors
+        this parameter is ignored and the response is always the output of
+        :term:`predict`. By default, :term:`predict_proba` is tried first
+        and we revert to :term:`decision_function` if it doesn't exist.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights are used to calculate weighted means when averaging the
+        model output. If `None`, then samples are equally weighted. Note that
+        `sample_weight` does not change the individual predictions.
+
+    Returns
+    -------
+    averaged_predictions : array-like of shape (n_targets, n_points)
+        The averaged predictions for the given `grid` of features values.
+        Note that `n_targets` is the number of targets (e.g. 1 for binary
+        classification, `n_tasks` for multi-output regression, and `n_classes` for
+        multiclass classification) and `n_points` is the number of points in the `grid`.
+
+    predictions : array-like
+        The predictions for the given `grid` of features values over the samples
+        from `X`. For non-multioutput regression and binary classification the
+        shape is `(n_instances, n_points)` and for multi-output regression and
+        multiclass classification the shape is `(n_targets, n_instances, n_points)`,
+        where `n_targets` is the number of targets (`n_tasks` for multi-output
+        regression, and `n_classes` for multiclass classification), `n_instances`
+        is the number of instances in `X`, and `n_points` is the number of points
+        in the `grid`.
+    """
+    predictions = []
+    averaged_predictions = []
+
+    if response_method == "auto":
+        response_method = (
+            "predict" if is_regressor(est) else ["predict_proba", "decision_function"]
+        )
+
+    X_eval = X.copy()
+    for new_values in grid:
+        for i, variable in enumerate(features):
+            _safe_assign(X_eval, new_values[i], column_indexer=variable)
+
+        # Note: predictions is of shape
+        # (n_points,) for non-multioutput regressors
+        # (n_points, n_tasks) for multioutput regressors
+        # (n_points, 1) for the regressors in cross_decomposition (I think)
+        # (n_points, 1) for binary classification (positive class already selected)
+        # (n_points, n_classes) for multiclass classification
+        pred, _ = _get_response_values(est, X_eval, response_method=response_method)
+
+        predictions.append(pred)
+        # average over samples
+        averaged_predictions.append(np.average(pred, axis=0, weights=sample_weight))
+
+    n_samples = X.shape[0]
+
+    # reshape to (n_targets, n_instances, n_points) where n_targets is:
+    # - 1 for non-multioutput regression and binary classification (shape is
+    #   already correct in those cases)
+    # - n_tasks for multi-output regression
+    # - n_classes for multiclass classification.
+    predictions = np.array(predictions).T
+    if is_regressor(est) and predictions.ndim == 2:
+        # non-multioutput regression, shape is (n_instances, n_points,)
+        predictions = predictions.reshape(n_samples, -1)
+    elif is_classifier(est) and predictions.shape[0] == 2:
+        # Binary classification, shape is (2, n_instances, n_points).
+        # we output the effect of **positive** class
+        predictions = predictions[1]
+        predictions = predictions.reshape(n_samples, -1)
+
+    # reshape averaged_predictions to (n_targets, n_points) where n_targets is:
+    # - 1 for non-multioutput regression and binary classification (shape is
+    #   already correct in those cases)
+    # - n_tasks for multi-output regression
+    # - n_classes for multiclass classification.
+    averaged_predictions = np.array(averaged_predictions).T
+    if averaged_predictions.ndim == 1:
+        # reshape to (1, n_points) for consistency with
+        # _partial_dependence_recursion
+        averaged_predictions = averaged_predictions.reshape(1, -1)
+
+    return averaged_predictions, predictions
+
+
+@validate_params(
+    {
+        "estimator": [
+            HasMethods(["fit", "predict"]),
+            HasMethods(["fit", "predict_proba"]),
+            HasMethods(["fit", "decision_function"]),
+        ],
+        "X": ["array-like", "sparse matrix"],
+        "features": ["array-like", Integral, str],
+        "sample_weight": ["array-like", None],
+        "categorical_features": ["array-like", None],
+        "feature_names": ["array-like", None],
+        "response_method": [StrOptions({"auto", "predict_proba", "decision_function"})],
+        "percentiles": [tuple],
+        "grid_resolution": [Interval(Integral, 1, None, closed="left")],
+        "method": [StrOptions({"auto", "recursion", "brute"})],
+        "kind": [StrOptions({"average", "individual", "both"})],
+        "custom_values": [dict, None],
+    },
+    prefer_skip_nested_validation=True,
+)
+def partial_dependence(
+    estimator,
+    X,
+    features,
+    *,
+    sample_weight=None,
+    categorical_features=None,
+    feature_names=None,
+    response_method="auto",
+    percentiles=(0.05, 0.95),
+    grid_resolution=100,
+    custom_values=None,
+    method="auto",
+    kind="average",
+):
+    """Partial dependence of ``features``.
+
+    Partial dependence of a feature (or a set of features) corresponds to
+    the average response of an estimator for each possible value of the
+    feature.
+
+    Read more in
+    :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
+    and the :ref:`User Guide <partial_dependence>`.
+
+    .. warning::
+
+        For :class:`~sklearn.ensemble.GradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`, the
+        `'recursion'` method (used by default) will not account for the `init`
+        predictor of the boosting process. In practice, this will produce
+        the same values as `'brute'` up to a constant offset in the target
+        response, provided that `init` is a constant estimator (which is the
+        default). However, if `init` is not a constant estimator, the
+        partial dependence values are incorrect for `'recursion'` because the
+        offset will be sample-dependent. It is preferable to use the `'brute'`
+        method. Note that this only applies to
+        :class:`~sklearn.ensemble.GradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.GradientBoostingRegressor`, not to
+        :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+        :class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
+
+    Parameters
+    ----------
+    estimator : BaseEstimator
+        A fitted estimator object implementing :term:`predict`,
+        :term:`predict_proba`, or :term:`decision_function`.
+        Multioutput-multiclass classifiers are not supported.
+
+    X : {array-like, sparse matrix or dataframe} of shape (n_samples, n_features)
+        ``X`` is used to generate a grid of values for the target
+        ``features`` (where the partial dependence will be evaluated), and
+        also to generate values for the complement features when the
+        `method` is 'brute'.
+
+    features : array-like of {int, str, bool} or int or str
+        The feature (e.g. `[0]`) or pair of interacting features
+        (e.g. `[(0, 1)]`) for which the partial dependency should be computed.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights are used to calculate weighted means when averaging the
+        model output. If `None`, then samples are equally weighted. If
+        `sample_weight` is not `None`, then `method` will be set to `'brute'`.
+        Note that `sample_weight` is ignored for `kind='individual'`.
+
+        .. versionadded:: 1.3
+
+    categorical_features : array-like of shape (n_features,) or shape \
+            (n_categorical_features,), dtype={bool, int, str}, default=None
+        Indicates the categorical features.
+
+        - `None`: no feature will be considered categorical;
+        - boolean array-like: boolean mask of shape `(n_features,)`
+            indicating which features are categorical. Thus, this array has
+            the same shape has `X.shape[1]`;
+        - integer or string array-like: integer indices or strings
+            indicating categorical features.
+
+        .. versionadded:: 1.2
+
+    feature_names : array-like of shape (n_features,), dtype=str, default=None
+        Name of each feature; `feature_names[i]` holds the name of the feature
+        with index `i`.
+        By default, the name of the feature corresponds to their numerical
+        index for NumPy array and their column name for pandas dataframe.
+
+        .. versionadded:: 1.2
+
+    response_method : {'auto', 'predict_proba', 'decision_function'}, \
+            default='auto'
+        Specifies whether to use :term:`predict_proba` or
+        :term:`decision_function` as the target response. For regressors
+        this parameter is ignored and the response is always the output of
+        :term:`predict`. By default, :term:`predict_proba` is tried first
+        and we revert to :term:`decision_function` if it doesn't exist. If
+        ``method`` is 'recursion', the response is always the output of
+        :term:`decision_function`.
+
+    percentiles : tuple of float, default=(0.05, 0.95)
+        The lower and upper percentile used to create the extreme values
+        for the grid. Must be in [0, 1].
+        This parameter is overridden by `custom_values` if that parameter is set.
+
+    grid_resolution : int, default=100
+        The number of equally spaced points on the grid, for each target
+        feature.
+        This parameter is overridden by `custom_values` if that parameter is set.
+
+    custom_values : dict
+        A dictionary mapping the index of an element of `features` to an array
+        of values where the partial dependence should be calculated
+        for that feature. Setting a range of values for a feature overrides
+        `grid_resolution` and `percentiles`.
+
+        See :ref:`how to use partial_dependence
+        <plt_partial_dependence_custom_values>` for an example of how this parameter can
+        be used.
+
+        .. versionadded:: 1.7
+
+    method : {'auto', 'recursion', 'brute'}, default='auto'
+        The method used to calculate the averaged predictions:
+
+        - `'recursion'` is only supported for some tree-based estimators
+          (namely
+          :class:`~sklearn.ensemble.GradientBoostingClassifier`,
+          :class:`~sklearn.ensemble.GradientBoostingRegressor`,
+          :class:`~sklearn.ensemble.HistGradientBoostingClassifier`,
+          :class:`~sklearn.ensemble.HistGradientBoostingRegressor`,
+          :class:`~sklearn.tree.DecisionTreeRegressor`,
+          :class:`~sklearn.ensemble.RandomForestRegressor`,
+          ) when `kind='average'`.
+          This is more efficient in terms of speed.
+          With this method, the target response of a
+          classifier is always the decision function, not the predicted
+          probabilities. Since the `'recursion'` method implicitly computes
+          the average of the Individual Conditional Expectation (ICE) by
+          design, it is not compatible with ICE and thus `kind` must be
+          `'average'`.
+
+        - `'brute'` is supported for any estimator, but is more
+          computationally intensive.
+
+        - `'auto'`: the `'recursion'` is used for estimators that support it,
+          and `'brute'` is used otherwise. If `sample_weight` is not `None`,
+          then `'brute'` is used regardless of the estimator.
+
+        Please see :ref:`this note <pdp_method_differences>` for
+        differences between the `'brute'` and `'recursion'` method.
+
+    kind : {'average', 'individual', 'both'}, default='average'
+        Whether to return the partial dependence averaged across all the
+        samples in the dataset or one value per sample or both.
+        See Returns below.
+
+        Note that the fast `method='recursion'` option is only available for
+        `kind='average'` and `sample_weights=None`. Computing individual
+        dependencies and doing weighted averages requires using the slower
+        `method='brute'`.
+
+        .. versionadded:: 0.24
+
+    Returns
+    -------
+    predictions : :class:`~sklearn.utils.Bunch`
+        Dictionary-like object, with the following attributes.
+
+        individual : ndarray of shape (n_outputs, n_instances, \
+                len(values[0]), len(values[1]), ...)
+            The predictions for all the points in the grid for all
+            samples in X. This is also known as Individual
+            Conditional Expectation (ICE).
+            Only available when `kind='individual'` or `kind='both'`.
+
+        average : ndarray of shape (n_outputs, len(values[0]), \
+                len(values[1]), ...)
+            The predictions for all the points in the grid, averaged
+            over all samples in X (or over the training data if
+            `method` is 'recursion').
+            Only available when `kind='average'` or `kind='both'`.
+
+        grid_values : seq of 1d ndarrays
+            The values with which the grid has been created. The generated
+            grid is a cartesian product of the arrays in `grid_values` where
+            `len(grid_values) == len(features)`. The size of each array
+            `grid_values[j]` is either `grid_resolution`, or the number of
+            unique values in `X[:, j]`, whichever is smaller.
+
+            .. versionadded:: 1.3
+
+        `n_outputs` corresponds to the number of classes in a multi-class
+        setting, or to the number of tasks for multi-output regression.
+        For classical regression and binary classification `n_outputs==1`.
+        `n_values_feature_j` corresponds to the size `grid_values[j]`.
+
+    See Also
+    --------
+    PartialDependenceDisplay.from_estimator : Plot Partial Dependence.
+    PartialDependenceDisplay : Partial Dependence visualization.
+
+    Examples
+    --------
+    >>> X = [[0, 0, 2], [1, 0, 0]]
+    >>> y = [0, 1]
+    >>> from sklearn.ensemble import GradientBoostingClassifier
+    >>> gb = GradientBoostingClassifier(random_state=0).fit(X, y)
+    >>> partial_dependence(gb, features=[0], X=X, percentiles=(0, 1),
+    ...                    grid_resolution=2) # doctest: +SKIP
+    (array([[-4.52,  4.52]]), [array([ 0.,  1.])])
+    """
+    check_is_fitted(estimator)
+
+    if not (is_classifier(estimator) or is_regressor(estimator)):
+        raise ValueError("'estimator' must be a fitted regressor or classifier.")
+
+    if is_classifier(estimator) and isinstance(estimator.classes_[0], np.ndarray):
+        raise ValueError("Multiclass-multioutput estimators are not supported")
+
+    # Use check_array only on lists and other non-array-likes / sparse. Do not
+    # convert DataFrame into a NumPy array.
+    if not (hasattr(X, "__array__") or sparse.issparse(X)):
+        X = check_array(X, ensure_all_finite="allow-nan", dtype=object)
+
+    if is_regressor(estimator) and response_method != "auto":
+        raise ValueError(
+            "The response_method parameter is ignored for regressors and "
+            "must be 'auto'."
+        )
+
+    if kind != "average":
+        if method == "recursion":
+            raise ValueError(
+                "The 'recursion' method only applies when 'kind' is set to 'average'"
+            )
+        method = "brute"
+
+    if method == "recursion" and sample_weight is not None:
+        raise ValueError(
+            "The 'recursion' method can only be applied when sample_weight is None."
+        )
+
+    if method == "auto":
+        if sample_weight is not None:
+            method = "brute"
+        elif isinstance(estimator, BaseGradientBoosting) and estimator.init is None:
+            method = "recursion"
+        elif isinstance(
+            estimator,
+            (BaseHistGradientBoosting, DecisionTreeRegressor, RandomForestRegressor),
+        ):
+            method = "recursion"
+        else:
+            method = "brute"
+
+    if method == "recursion":
+        if not isinstance(
+            estimator,
+            (
+                BaseGradientBoosting,
+                BaseHistGradientBoosting,
+                DecisionTreeRegressor,
+                RandomForestRegressor,
+            ),
+        ):
+            supported_classes_recursion = (
+                "GradientBoostingClassifier",
+                "GradientBoostingRegressor",
+                "HistGradientBoostingClassifier",
+                "HistGradientBoostingRegressor",
+                "HistGradientBoostingRegressor",
+                "DecisionTreeRegressor",
+                "RandomForestRegressor",
+            )
+            raise ValueError(
+                "Only the following estimators support the 'recursion' "
+                "method: {}. Try using method='brute'.".format(
+                    ", ".join(supported_classes_recursion)
+                )
+            )
+        if response_method == "auto":
+            response_method = "decision_function"
+
+        if response_method != "decision_function":
+            raise ValueError(
+                "With the 'recursion' method, the response_method must be "
+                "'decision_function'. Got {}.".format(response_method)
+            )
+
+    if sample_weight is not None:
+        sample_weight = _check_sample_weight(sample_weight, X)
+
+    if _determine_key_type(features, accept_slice=False) == "int":
+        # _get_column_indices() supports negative indexing. Here, we limit
+        # the indexing to be positive. The upper bound will be checked
+        # by _get_column_indices()
+        if np.any(np.less(features, 0)):
+            raise ValueError("all features must be in [0, {}]".format(X.shape[1] - 1))
+
+    features_indices = np.asarray(
+        _get_column_indices(X, features), dtype=np.intp, order="C"
+    ).ravel()
+
+    feature_names = _check_feature_names(X, feature_names)
+
+    n_features = X.shape[1]
+    if categorical_features is None:
+        is_categorical = [False] * len(features_indices)
+    else:
+        categorical_features = np.asarray(categorical_features)
+        if categorical_features.size == 0:
+            raise ValueError(
+                "Passing an empty list (`[]`) to `categorical_features` is not "
+                "supported. Use `None` instead to indicate that there are no "
+                "categorical features."
+            )
+        if categorical_features.dtype.kind == "b":
+            # categorical features provided as a list of boolean
+            if categorical_features.size != n_features:
+                raise ValueError(
+                    "When `categorical_features` is a boolean array-like, "
+                    "the array should be of shape (n_features,). Got "
+                    f"{categorical_features.size} elements while `X` contains "
+                    f"{n_features} features."
+                )
+            is_categorical = [categorical_features[idx] for idx in features_indices]
+        elif categorical_features.dtype.kind in ("i", "O", "U"):
+            # categorical features provided as a list of indices or feature names
+            categorical_features_idx = [
+                _get_feature_index(cat, feature_names=feature_names)
+                for cat in categorical_features
+            ]
+            is_categorical = [
+                idx in categorical_features_idx for idx in features_indices
+            ]
+        else:
+            raise ValueError(
+                "Expected `categorical_features` to be an array-like of boolean,"
+                f" integer, or string. Got {categorical_features.dtype} instead."
+            )
+
+    custom_values = custom_values or {}
+    if isinstance(features, (str, int)):
+        features = [features]
+
+    for feature_idx, feature, is_cat in zip(features_indices, features, is_categorical):
+        if is_cat:
+            continue
+
+        if _safe_indexing(X, feature_idx, axis=1).dtype.kind in "iu":
+            # TODO(1.9): raise a ValueError instead.
+            warnings.warn(
+                f"The column {feature!r} contains integer data. Partial "
+                "dependence plots are not supported for integer data: this "
+                "can lead to implicit rounding with NumPy arrays or even errors "
+                "with newer pandas versions. Please convert numerical features"
+                "to floating point dtypes ahead of time to avoid problems. "
+                "This will raise ValueError in scikit-learn 1.9.",
+                FutureWarning,
+            )
+            # Do not warn again for other features to avoid spamming the caller.
+            break
+
+    X_subset = _safe_indexing(X, features_indices, axis=1)
+
+    custom_values_for_X_subset = {
+        index: custom_values.get(feature)
+        for index, feature in enumerate(features)
+        if feature in custom_values
+    }
+
+    grid, values = _grid_from_X(
+        X_subset,
+        percentiles,
+        is_categorical,
+        grid_resolution,
+        custom_values_for_X_subset,
+    )
+
+    if method == "brute":
+        averaged_predictions, predictions = _partial_dependence_brute(
+            estimator, grid, features_indices, X, response_method, sample_weight
+        )
+
+        # reshape predictions to
+        # (n_outputs, n_instances, n_values_feature_0, n_values_feature_1, ...)
+        predictions = predictions.reshape(
+            -1, X.shape[0], *[val.shape[0] for val in values]
+        )
+    else:
+        averaged_predictions = _partial_dependence_recursion(
+            estimator, grid, features_indices
+        )
+
+    # reshape averaged_predictions to
+    # (n_outputs, n_values_feature_0, n_values_feature_1, ...)
+    averaged_predictions = averaged_predictions.reshape(
+        -1, *[val.shape[0] for val in values]
+    )
+    pdp_results = Bunch(grid_values=values)
+
+    if kind == "average":
+        pdp_results["average"] = averaged_predictions
+    elif kind == "individual":
+        pdp_results["individual"] = predictions
+    else:  # kind='both'
+        pdp_results["average"] = averaged_predictions
+        pdp_results["individual"] = predictions
+
+    return pdp_results
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_pd_utils.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_pd_utils.py
@@ -0,0 +1,68 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+def _check_feature_names(X, feature_names=None):
+    """Check feature names.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Input data.
+
+    feature_names : None or array-like of shape (n_names,), dtype=str
+        Feature names to check or `None`.
+
+    Returns
+    -------
+    feature_names : list of str
+        Feature names validated. If `feature_names` is `None`, then a list of
+        feature names is provided, i.e. the column names of a pandas dataframe
+        or a generic list of feature names (e.g. `["x0", "x1", ...]`) for a
+        NumPy array.
+    """
+    if feature_names is None:
+        if hasattr(X, "columns") and hasattr(X.columns, "tolist"):
+            # get the column names for a pandas dataframe
+            feature_names = X.columns.tolist()
+        else:
+            # define a list of numbered indices for a numpy array
+            feature_names = [f"x{i}" for i in range(X.shape[1])]
+    elif hasattr(feature_names, "tolist"):
+        # convert numpy array or pandas index to a list
+        feature_names = feature_names.tolist()
+    if len(set(feature_names)) != len(feature_names):
+        raise ValueError("feature_names should not contain duplicates.")
+
+    return feature_names
+
+
+def _get_feature_index(fx, feature_names=None):
+    """Get feature index.
+
+    Parameters
+    ----------
+    fx : int or str
+        Feature index or name.
+
+    feature_names : list of str, default=None
+        All feature names from which to search the indices.
+
+    Returns
+    -------
+    idx : int
+        Feature index.
+    """
+    if isinstance(fx, str):
+        if feature_names is None:
+            raise ValueError(
+                f"Cannot plot partial dependence for feature {fx!r} since "
+                "the list of feature names was not provided, neither as "
+                "column names of a pandas data-frame nor via the feature_names "
+                "parameter."
+            )
+        try:
+            return feature_names.index(fx)
+        except ValueError as e:
+            raise ValueError(f"Feature {fx!r} not in feature_names") from e
+    return fx
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_permutation_importance.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_permutation_importance.py
@@ -0,0 +1,313 @@
+"""Permutation importance for estimators."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import numbers
+
+import numpy as np
+
+from ..ensemble._bagging import _generate_indices
+from ..metrics import check_scoring, get_scorer_names
+from ..model_selection._validation import _aggregate_score_dicts
+from ..utils import Bunch, _safe_indexing, check_array, check_random_state
+from ..utils._param_validation import (
+    HasMethods,
+    Integral,
+    Interval,
+    RealNotInt,
+    StrOptions,
+    validate_params,
+)
+from ..utils.parallel import Parallel, delayed
+
+
+def _weights_scorer(scorer, estimator, X, y, sample_weight):
+    if sample_weight is not None:
+        return scorer(estimator, X, y, sample_weight=sample_weight)
+    return scorer(estimator, X, y)
+
+
+def _calculate_permutation_scores(
+    estimator,
+    X,
+    y,
+    sample_weight,
+    col_idx,
+    random_state,
+    n_repeats,
+    scorer,
+    max_samples,
+):
+    """Calculate score when `col_idx` is permuted."""
+    random_state = check_random_state(random_state)
+
+    # Work on a copy of X to ensure thread-safety in case of threading based
+    # parallelism. Furthermore, making a copy is also useful when the joblib
+    # backend is 'loky' (default) or the old 'multiprocessing': in those cases,
+    # if X is large it will be automatically be backed by a readonly memory map
+    # (memmap). X.copy() on the other hand is always guaranteed to return a
+    # writable data-structure whose columns can be shuffled inplace.
+    if max_samples < X.shape[0]:
+        row_indices = _generate_indices(
+            random_state=random_state,
+            bootstrap=False,
+            n_population=X.shape[0],
+            n_samples=max_samples,
+        )
+        X_permuted = _safe_indexing(X, row_indices, axis=0)
+        y = _safe_indexing(y, row_indices, axis=0)
+        if sample_weight is not None:
+            sample_weight = _safe_indexing(sample_weight, row_indices, axis=0)
+    else:
+        X_permuted = X.copy()
+
+    scores = []
+    shuffling_idx = np.arange(X_permuted.shape[0])
+    for _ in range(n_repeats):
+        random_state.shuffle(shuffling_idx)
+        if hasattr(X_permuted, "iloc"):
+            col = X_permuted.iloc[shuffling_idx, col_idx]
+            col.index = X_permuted.index
+            X_permuted[X_permuted.columns[col_idx]] = col
+        else:
+            X_permuted[:, col_idx] = X_permuted[shuffling_idx, col_idx]
+        scores.append(_weights_scorer(scorer, estimator, X_permuted, y, sample_weight))
+
+    if isinstance(scores[0], dict):
+        scores = _aggregate_score_dicts(scores)
+    else:
+        scores = np.array(scores)
+
+    return scores
+
+
+def _create_importances_bunch(baseline_score, permuted_score):
+    """Compute the importances as the decrease in score.
+
+    Parameters
+    ----------
+    baseline_score : ndarray of shape (n_features,)
+        The baseline score without permutation.
+    permuted_score : ndarray of shape (n_features, n_repeats)
+        The permuted scores for the `n` repetitions.
+
+    Returns
+    -------
+    importances : :class:`~sklearn.utils.Bunch`
+        Dictionary-like object, with the following attributes.
+        importances_mean : ndarray, shape (n_features, )
+            Mean of feature importance over `n_repeats`.
+        importances_std : ndarray, shape (n_features, )
+            Standard deviation over `n_repeats`.
+        importances : ndarray, shape (n_features, n_repeats)
+            Raw permutation importance scores.
+    """
+    importances = baseline_score - permuted_score
+    return Bunch(
+        importances_mean=np.mean(importances, axis=1),
+        importances_std=np.std(importances, axis=1),
+        importances=importances,
+    )
+
+
+@validate_params(
+    {
+        "estimator": [HasMethods(["fit"])],
+        "X": ["array-like"],
+        "y": ["array-like", None],
+        "scoring": [
+            StrOptions(set(get_scorer_names())),
+            callable,
+            list,
+            tuple,
+            dict,
+            None,
+        ],
+        "n_repeats": [Interval(Integral, 1, None, closed="left")],
+        "n_jobs": [Integral, None],
+        "random_state": ["random_state"],
+        "sample_weight": ["array-like", None],
+        "max_samples": [
+            Interval(Integral, 1, None, closed="left"),
+            Interval(RealNotInt, 0, 1, closed="right"),
+        ],
+    },
+    prefer_skip_nested_validation=True,
+)
+def permutation_importance(
+    estimator,
+    X,
+    y,
+    *,
+    scoring=None,
+    n_repeats=5,
+    n_jobs=None,
+    random_state=None,
+    sample_weight=None,
+    max_samples=1.0,
+):
+    """Permutation importance for feature evaluation [BRE]_.
+
+    The :term:`estimator` is required to be a fitted estimator. `X` can be the
+    data set used to train the estimator or a hold-out set. The permutation
+    importance of a feature is calculated as follows. First, a baseline metric,
+    defined by :term:`scoring`, is evaluated on a (potentially different)
+    dataset defined by the `X`. Next, a feature column from the validation set
+    is permuted and the metric is evaluated again. The permutation importance
+    is defined to be the difference between the baseline metric and metric from
+    permutating the feature column.
+
+    Read more in the :ref:`User Guide <permutation_importance>`.
+
+    Parameters
+    ----------
+    estimator : object
+        An estimator that has already been :term:`fitted` and is compatible
+        with :term:`scorer`.
+
+    X : ndarray or DataFrame, shape (n_samples, n_features)
+        Data on which permutation importance will be computed.
+
+    y : array-like or None, shape (n_samples, ) or (n_samples, n_classes)
+        Targets for supervised or `None` for unsupervised.
+
+    scoring : str, callable, list, tuple, or dict, default=None
+        Scorer to use.
+        If `scoring` represents a single score, one can use:
+
+        - str: see :ref:`scoring_string_names` for options.
+        - callable: a scorer callable object (e.g., function) with signature
+          ``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
+        - `None`: the `estimator`'s
+          :ref:`default evaluation criterion <scoring_api_overview>` is used.
+
+        If `scoring` represents multiple scores, one can use:
+
+        - a list or tuple of unique strings;
+        - a callable returning a dictionary where the keys are the metric
+          names and the values are the metric scores;
+        - a dictionary with metric names as keys and callables a values.
+
+        Passing multiple scores to `scoring` is more efficient than calling
+        `permutation_importance` for each of the scores as it reuses
+        predictions to avoid redundant computation.
+
+    n_repeats : int, default=5
+        Number of times to permute a feature.
+
+    n_jobs : int or None, default=None
+        Number of jobs to run in parallel. The computation is done by computing
+        permutation score for each columns and parallelized over the columns.
+        `None` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        `-1` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    random_state : int, RandomState instance, default=None
+        Pseudo-random number generator to control the permutations of each
+        feature.
+        Pass an int to get reproducible results across function calls.
+        See :term:`Glossary <random_state>`.
+
+    sample_weight : array-like of shape (n_samples,), default=None
+        Sample weights used in scoring.
+
+        .. versionadded:: 0.24
+
+    max_samples : int or float, default=1.0
+        The number of samples to draw from X to compute feature importance
+        in each repeat (without replacement).
+
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples.
+        - If `max_samples` is equal to `1.0` or `X.shape[0]`, all samples
+          will be used.
+
+        While using this option may provide less accurate importance estimates,
+        it keeps the method tractable when evaluating feature importance on
+        large datasets. In combination with `n_repeats`, this allows to control
+        the computational speed vs statistical accuracy trade-off of this method.
+
+        .. versionadded:: 1.0
+
+    Returns
+    -------
+    result : :class:`~sklearn.utils.Bunch` or dict of such instances
+        Dictionary-like object, with the following attributes.
+
+        importances_mean : ndarray of shape (n_features, )
+            Mean of feature importance over `n_repeats`.
+        importances_std : ndarray of shape (n_features, )
+            Standard deviation over `n_repeats`.
+        importances : ndarray of shape (n_features, n_repeats)
+            Raw permutation importance scores.
+
+        If there are multiple scoring metrics in the scoring parameter
+        `result` is a dict with scorer names as keys (e.g. 'roc_auc') and
+        `Bunch` objects like above as values.
+
+    References
+    ----------
+    .. [BRE] :doi:`L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32,
+             2001. <10.1023/A:1010933404324>`
+
+    Examples
+    --------
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> from sklearn.inspection import permutation_importance
+    >>> X = [[1, 9, 9],[1, 9, 9],[1, 9, 9],
+    ...      [0, 9, 9],[0, 9, 9],[0, 9, 9]]
+    >>> y = [1, 1, 1, 0, 0, 0]
+    >>> clf = LogisticRegression().fit(X, y)
+    >>> result = permutation_importance(clf, X, y, n_repeats=10,
+    ...                                 random_state=0)
+    >>> result.importances_mean
+    array([0.4666, 0.       , 0.       ])
+    >>> result.importances_std
+    array([0.2211, 0.       , 0.       ])
+    """
+    if not hasattr(X, "iloc"):
+        X = check_array(X, ensure_all_finite="allow-nan", dtype=None)
+
+    # Precompute random seed from the random state to be used
+    # to get a fresh independent RandomState instance for each
+    # parallel call to _calculate_permutation_scores, irrespective of
+    # the fact that variables are shared or not depending on the active
+    # joblib backend (sequential, thread-based or process-based).
+    random_state = check_random_state(random_state)
+    random_seed = random_state.randint(np.iinfo(np.int32).max + 1)
+
+    if not isinstance(max_samples, numbers.Integral):
+        max_samples = int(max_samples * X.shape[0])
+    elif max_samples > X.shape[0]:
+        raise ValueError("max_samples must be <= n_samples")
+
+    scorer = check_scoring(estimator, scoring=scoring)
+    baseline_score = _weights_scorer(scorer, estimator, X, y, sample_weight)
+
+    scores = Parallel(n_jobs=n_jobs)(
+        delayed(_calculate_permutation_scores)(
+            estimator,
+            X,
+            y,
+            sample_weight,
+            col_idx,
+            random_seed,
+            n_repeats,
+            scorer,
+            max_samples,
+        )
+        for col_idx in range(X.shape[1])
+    )
+
+    if isinstance(baseline_score, dict):
+        return {
+            name: _create_importances_bunch(
+                baseline_score[name],
+                # unpack the permuted scores
+                np.array([scores[col_idx][name] for col_idx in range(X.shape[1])]),
+            )
+            for name in baseline_score
+        }
+    else:
+        return _create_importances_bunch(baseline_score, np.array(scores))
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/init.py
@@ -0,0 +1,2 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/decision_boundary.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/decision_boundary.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/partial_dependence.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/pycache/partial_dependence.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/decision_boundary.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/decision_boundary.py
@@ -0,0 +1,564 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+
+import numpy as np
+
+from ...base import is_regressor
+from ...preprocessing import LabelEncoder
+from ...utils import _safe_indexing
+from ...utils._optional_dependencies import check_matplotlib_support
+from ...utils._response import _get_response_values
+from ...utils._set_output import _get_adapter_from_container
+from ...utils.validation import (
+    _is_arraylike_not_scalar,
+    _is_pandas_df,
+    _is_polars_df,
+    _num_features,
+    check_is_fitted,
+)
+
+
+def _check_boundary_response_method(estimator, response_method, class_of_interest):
+    """Validate the response methods to be used with the fitted estimator.
+
+    Parameters
+    ----------
+    estimator : object
+        Fitted estimator to check.
+
+    response_method : {'auto', 'decision_function', 'predict_proba', 'predict'}
+        Specifies whether to use :term:`decision_function`, :term:`predict_proba`,
+        :term:`predict` as the target response. If set to 'auto', the response method is
+        tried in the before mentioned order.
+
+    class_of_interest : int, float, bool, str or None
+        The class considered when plotting the decision. Cannot be None if
+        multiclass and `response_method` is 'predict_proba' or 'decision_function'.
+
+        .. versionadded:: 1.4
+
+    Returns
+    -------
+    prediction_method : list of str or str
+        The name or list of names of the response methods to use.
+    """
+    has_classes = hasattr(estimator, "classes_")
+    if has_classes and _is_arraylike_not_scalar(estimator.classes_[0]):
+        msg = "Multi-label and multi-output multi-class classifiers are not supported"
+        raise ValueError(msg)
+
+    if response_method == "auto":
+        if is_regressor(estimator):
+            prediction_method = "predict"
+        else:
+            prediction_method = ["decision_function", "predict_proba", "predict"]
+    else:
+        prediction_method = response_method
+
+    return prediction_method
+
+
+class DecisionBoundaryDisplay:
+    """Decisions boundary visualization.
+
+    It is recommended to use
+    :func:`~sklearn.inspection.DecisionBoundaryDisplay.from_estimator`
+    to create a :class:`DecisionBoundaryDisplay`. All parameters are stored as
+    attributes.
+
+    Read more in the :ref:`User Guide <visualizations>`.
+
+    For a detailed example comparing the decision boundaries of multinomial and
+    one-vs-rest logistic regression, please see
+    :ref:`sphx_glr_auto_examples_linear_model_plot_logistic_multinomial.py`.
+
+    .. versionadded:: 1.1
+
+    Parameters
+    ----------
+    xx0 : ndarray of shape (grid_resolution, grid_resolution)
+        First output of :func:`meshgrid <numpy.meshgrid>`.
+
+    xx1 : ndarray of shape (grid_resolution, grid_resolution)
+        Second output of :func:`meshgrid <numpy.meshgrid>`.
+
+    response : ndarray of shape (grid_resolution, grid_resolution) or \
+            (grid_resolution, grid_resolution, n_classes)
+        Values of the response function.
+
+    multiclass_colors : list of str or str, default=None
+        Specifies how to color each class when plotting all classes of multiclass
+        problem. Ignored for binary problems and multiclass problems when plotting a
+        single prediction value per point.
+        Possible inputs are:
+
+        * list: list of Matplotlib
+          `color <https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def>`_
+          strings, of length `n_classes`
+        * str: name of :class:`matplotlib.colors.Colormap`
+        * None: 'viridis' colormap is used to sample colors
+
+        Single color colormaps will be generated from the colors in the list or
+        colors taken from the colormap and passed to the `cmap` parameter of
+        the `plot_method`.
+
+        .. versionadded:: 1.7
+
+    xlabel : str, default=None
+        Default label to place on x axis.
+
+    ylabel : str, default=None
+        Default label to place on y axis.
+
+    Attributes
+    ----------
+    surface_ : matplotlib `QuadContourSet` or `QuadMesh` or list of such objects
+        If `plot_method` is 'contour' or 'contourf', `surface_` is
+        :class:`QuadContourSet <matplotlib.contour.QuadContourSet>`. If
+        `plot_method` is 'pcolormesh', `surface_` is
+        :class:`QuadMesh <matplotlib.collections.QuadMesh>`.
+
+    multiclass_colors_ : array of shape (n_classes, 4)
+        Colors used to plot each class in multiclass problems.
+        Only defined when `color_of_interest` is None.
+
+        .. versionadded:: 1.7
+
+    ax_ : matplotlib Axes
+        Axes with decision boundary.
+
+    figure_ : matplotlib Figure
+        Figure containing the decision boundary.
+
+    See Also
+    --------
+    DecisionBoundaryDisplay.from_estimator : Plot decision boundary given an estimator.
+
+    Examples
+    --------
+    >>> import matplotlib.pyplot as plt
+    >>> import numpy as np
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.inspection import DecisionBoundaryDisplay
+    >>> from sklearn.tree import DecisionTreeClassifier
+    >>> iris = load_iris()
+    >>> feature_1, feature_2 = np.meshgrid(
+    ...     np.linspace(iris.data[:, 0].min(), iris.data[:, 0].max()),
+    ...     np.linspace(iris.data[:, 1].min(), iris.data[:, 1].max())
+    ... )
+    >>> grid = np.vstack([feature_1.ravel(), feature_2.ravel()]).T
+    >>> tree = DecisionTreeClassifier().fit(iris.data[:, :2], iris.target)
+    >>> y_pred = np.reshape(tree.predict(grid), feature_1.shape)
+    >>> display = DecisionBoundaryDisplay(
+    ...     xx0=feature_1, xx1=feature_2, response=y_pred
+    ... )
+    >>> display.plot()
+    <...>
+    >>> display.ax_.scatter(
+    ...     iris.data[:, 0], iris.data[:, 1], c=iris.target, edgecolor="black"
+    ... )
+    <...>
+    >>> plt.show()
+    """
+
+    def __init__(
+        self, *, xx0, xx1, response, multiclass_colors=None, xlabel=None, ylabel=None
+    ):
+        self.xx0 = xx0
+        self.xx1 = xx1
+        self.response = response
+        self.multiclass_colors = multiclass_colors
+        self.xlabel = xlabel
+        self.ylabel = ylabel
+
+    def plot(self, plot_method="contourf", ax=None, xlabel=None, ylabel=None, **kwargs):
+        """Plot visualization.
+
+        Parameters
+        ----------
+        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
+            Plotting method to call when plotting the response. Please refer
+            to the following matplotlib documentation for details:
+            :func:`contourf <matplotlib.pyplot.contourf>`,
+            :func:`contour <matplotlib.pyplot.contour>`,
+            :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
+
+        ax : Matplotlib axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        xlabel : str, default=None
+            Overwrite the x-axis label.
+
+        ylabel : str, default=None
+            Overwrite the y-axis label.
+
+        **kwargs : dict
+            Additional keyword arguments to be passed to the `plot_method`.
+
+        Returns
+        -------
+        display: :class:`~sklearn.inspection.DecisionBoundaryDisplay`
+            Object that stores computed values.
+        """
+        check_matplotlib_support("DecisionBoundaryDisplay.plot")
+        import matplotlib as mpl
+        import matplotlib.pyplot as plt
+
+        if plot_method not in ("contourf", "contour", "pcolormesh"):
+            raise ValueError(
+                "plot_method must be 'contourf', 'contour', or 'pcolormesh'. "
+                f"Got {plot_method} instead."
+            )
+
+        if ax is None:
+            _, ax = plt.subplots()
+
+        plot_func = getattr(ax, plot_method)
+        if self.response.ndim == 2:
+            self.surface_ = plot_func(self.xx0, self.xx1, self.response, **kwargs)
+        else:  # self.response.ndim == 3
+            n_responses = self.response.shape[-1]
+            for kwarg in ("cmap", "colors"):
+                if kwarg in kwargs:
+                    warnings.warn(
+                        f"'{kwarg}' is ignored in favor of 'multiclass_colors' "
+                        "in the multiclass case when the response method is "
+                        "'decision_function' or 'predict_proba'."
+                    )
+                    del kwargs[kwarg]
+
+            if self.multiclass_colors is None or isinstance(
+                self.multiclass_colors, str
+            ):
+                if self.multiclass_colors is None:
+                    cmap = "tab10" if n_responses <= 10 else "gist_rainbow"
+                else:
+                    cmap = self.multiclass_colors
+
+                # Special case for the tab10 and tab20 colormaps that encode a
+                # discrete set of colors that are easily distinguishable
+                # contrary to other colormaps that are continuous.
+                if cmap == "tab10" and n_responses <= 10:
+                    colors = plt.get_cmap("tab10", 10).colors[:n_responses]
+                elif cmap == "tab20" and n_responses <= 20:
+                    colors = plt.get_cmap("tab20", 20).colors[:n_responses]
+                else:
+                    cmap = plt.get_cmap(cmap, n_responses)
+                    if not hasattr(cmap, "colors"):
+                        # For LinearSegmentedColormap
+                        colors = cmap(np.linspace(0, 1, n_responses))
+                    else:
+                        colors = cmap.colors
+            elif isinstance(self.multiclass_colors, list):
+                colors = [mpl.colors.to_rgba(color) for color in self.multiclass_colors]
+            else:
+                raise ValueError("'multiclass_colors' must be a list or a str.")
+
+            self.multiclass_colors_ = colors
+            if plot_method == "contour":
+                # Plot only argmax map for contour
+                class_map = self.response.argmax(axis=2)
+                self.surface_ = plot_func(
+                    self.xx0, self.xx1, class_map, colors=colors, **kwargs
+                )
+            else:
+                multiclass_cmaps = [
+                    mpl.colors.LinearSegmentedColormap.from_list(
+                        f"colormap_{class_idx}", [(1.0, 1.0, 1.0, 1.0), (r, g, b, 1.0)]
+                    )
+                    for class_idx, (r, g, b, _) in enumerate(colors)
+                ]
+
+                self.surface_ = []
+                for class_idx, cmap in enumerate(multiclass_cmaps):
+                    response = np.ma.array(
+                        self.response[:, :, class_idx],
+                        mask=~(self.response.argmax(axis=2) == class_idx),
+                    )
+                    self.surface_.append(
+                        plot_func(self.xx0, self.xx1, response, cmap=cmap, **kwargs)
+                    )
+
+        if xlabel is not None or not ax.get_xlabel():
+            xlabel = self.xlabel if xlabel is None else xlabel
+            ax.set_xlabel(xlabel)
+        if ylabel is not None or not ax.get_ylabel():
+            ylabel = self.ylabel if ylabel is None else ylabel
+            ax.set_ylabel(ylabel)
+
+        self.ax_ = ax
+        self.figure_ = ax.figure
+        return self
+
+    @classmethod
+    def from_estimator(
+        cls,
+        estimator,
+        X,
+        *,
+        grid_resolution=100,
+        eps=1.0,
+        plot_method="contourf",
+        response_method="auto",
+        class_of_interest=None,
+        multiclass_colors=None,
+        xlabel=None,
+        ylabel=None,
+        ax=None,
+        **kwargs,
+    ):
+        """Plot decision boundary given an estimator.
+
+        Read more in the :ref:`User Guide <visualizations>`.
+
+        Parameters
+        ----------
+        estimator : object
+            Trained estimator used to plot the decision boundary.
+
+        X : {array-like, sparse matrix, dataframe} of shape (n_samples, 2)
+            Input data that should be only 2-dimensional.
+
+        grid_resolution : int, default=100
+            Number of grid points to use for plotting decision boundary.
+            Higher values will make the plot look nicer but be slower to
+            render.
+
+        eps : float, default=1.0
+            Extends the minimum and maximum values of X for evaluating the
+            response function.
+
+        plot_method : {'contourf', 'contour', 'pcolormesh'}, default='contourf'
+            Plotting method to call when plotting the response. Please refer
+            to the following matplotlib documentation for details:
+            :func:`contourf <matplotlib.pyplot.contourf>`,
+            :func:`contour <matplotlib.pyplot.contour>`,
+            :func:`pcolormesh <matplotlib.pyplot.pcolormesh>`.
+
+        response_method : {'auto', 'decision_function', 'predict_proba', \
+                'predict'}, default='auto'
+            Specifies whether to use :term:`decision_function`,
+            :term:`predict_proba` or :term:`predict` as the target response.
+            If set to 'auto', the response method is tried in the order as
+            listed above.
+
+            .. versionchanged:: 1.6
+                For multiclass problems, 'auto' no longer defaults to 'predict'.
+
+        class_of_interest : int, float, bool or str, default=None
+            The class to be plotted when `response_method` is 'predict_proba'
+            or 'decision_function'. If None, `estimator.classes_[1]` is considered
+            the positive class for binary classifiers. For multiclass
+            classifiers, if None, all classes will be represented in the
+            decision boundary plot; the class with the highest response value
+            at each point is plotted. The color of each class can be set via
+            `multiclass_colors`.
+
+            .. versionadded:: 1.4
+
+        multiclass_colors : list of str, or str, default=None
+            Specifies how to color each class when plotting multiclass
+            'predict_proba' or 'decision_function' and `class_of_interest` is
+            None. Ignored in all other cases.
+
+            Possible inputs are:
+
+            * list: list of Matplotlib
+              `color <https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def>`_
+              strings, of length `n_classes`
+            * str: name of :class:`matplotlib.colors.Colormap`
+            * None: 'tab10' colormap is used to sample colors if the number of
+                classes is less than or equal to 10, otherwise 'gist_rainbow'
+                colormap.
+
+            Single color colormaps will be generated from the colors in the list or
+            colors taken from the colormap, and passed to the `cmap` parameter of
+            the `plot_method`.
+
+            .. versionadded:: 1.7
+
+        xlabel : str, default=None
+            The label used for the x-axis. If `None`, an attempt is made to
+            extract a label from `X` if it is a dataframe, otherwise an empty
+            string is used.
+
+        ylabel : str, default=None
+            The label used for the y-axis. If `None`, an attempt is made to
+            extract a label from `X` if it is a dataframe, otherwise an empty
+            string is used.
+
+        ax : Matplotlib axes, default=None
+            Axes object to plot on. If `None`, a new figure and axes is
+            created.
+
+        **kwargs : dict
+            Additional keyword arguments to be passed to the
+            `plot_method`.
+
+        Returns
+        -------
+        display : :class:`~sklearn.inspection.DecisionBoundaryDisplay`
+            Object that stores the result.
+
+        See Also
+        --------
+        DecisionBoundaryDisplay : Decision boundary visualization.
+        sklearn.metrics.ConfusionMatrixDisplay.from_estimator : Plot the
+            confusion matrix given an estimator, the data, and the label.
+        sklearn.metrics.ConfusionMatrixDisplay.from_predictions : Plot the
+            confusion matrix given the true and predicted labels.
+
+        Examples
+        --------
+        >>> import matplotlib.pyplot as plt
+        >>> from sklearn.datasets import load_iris
+        >>> from sklearn.linear_model import LogisticRegression
+        >>> from sklearn.inspection import DecisionBoundaryDisplay
+        >>> iris = load_iris()
+        >>> X = iris.data[:, :2]
+        >>> classifier = LogisticRegression().fit(X, iris.target)
+        >>> disp = DecisionBoundaryDisplay.from_estimator(
+        ...     classifier, X, response_method="predict",
+        ...     xlabel=iris.feature_names[0], ylabel=iris.feature_names[1],
+        ...     alpha=0.5,
+        ... )
+        >>> disp.ax_.scatter(X[:, 0], X[:, 1], c=iris.target, edgecolor="k")
+        <...>
+        >>> plt.show()
+        """
+        check_matplotlib_support(f"{cls.__name__}.from_estimator")
+        check_is_fitted(estimator)
+        import matplotlib as mpl
+
+        if not grid_resolution > 1:
+            raise ValueError(
+                "grid_resolution must be greater than 1. Got"
+                f" {grid_resolution} instead."
+            )
+
+        if not eps >= 0:
+            raise ValueError(
+                f"eps must be greater than or equal to 0. Got {eps} instead."
+            )
+
+        possible_plot_methods = ("contourf", "contour", "pcolormesh")
+        if plot_method not in possible_plot_methods:
+            available_methods = ", ".join(possible_plot_methods)
+            raise ValueError(
+                f"plot_method must be one of {available_methods}. "
+                f"Got {plot_method} instead."
+            )
+
+        num_features = _num_features(X)
+        if num_features != 2:
+            raise ValueError(
+                f"n_features must be equal to 2. Got {num_features} instead."
+            )
+
+        if (
+            response_method in ("predict_proba", "decision_function", "auto")
+            and multiclass_colors is not None
+            and hasattr(estimator, "classes_")
+            and (n_classes := len(estimator.classes_)) > 2
+        ):
+            if isinstance(multiclass_colors, list):
+                if len(multiclass_colors) != n_classes:
+                    raise ValueError(
+                        "When 'multiclass_colors' is a list, it must be of the same "
+                        f"length as 'estimator.classes_' ({n_classes}), got: "
+                        f"{len(multiclass_colors)}."
+                    )
+                elif any(
+                    not mpl.colors.is_color_like(col) for col in multiclass_colors
+                ):
+                    raise ValueError(
+                        "When 'multiclass_colors' is a list, it can only contain valid"
+                        f" Matplotlib color names. Got: {multiclass_colors}"
+                    )
+            if isinstance(multiclass_colors, str):
+                if multiclass_colors not in mpl.pyplot.colormaps():
+                    raise ValueError(
+                        "When 'multiclass_colors' is a string, it must be a valid "
+                        f"Matplotlib colormap. Got: {multiclass_colors}"
+                    )
+
+        x0, x1 = _safe_indexing(X, 0, axis=1), _safe_indexing(X, 1, axis=1)
+
+        x0_min, x0_max = x0.min() - eps, x0.max() + eps
+        x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+        xx0, xx1 = np.meshgrid(
+            np.linspace(x0_min, x0_max, grid_resolution),
+            np.linspace(x1_min, x1_max, grid_resolution),
+        )
+
+        X_grid = np.c_[xx0.ravel(), xx1.ravel()]
+        if _is_pandas_df(X) or _is_polars_df(X):
+            adapter = _get_adapter_from_container(X)
+            X_grid = adapter.create_container(
+                X_grid,
+                X_grid,
+                columns=X.columns,
+            )
+
+        prediction_method = _check_boundary_response_method(
+            estimator, response_method, class_of_interest
+        )
+        try:
+            response, _, response_method_used = _get_response_values(
+                estimator,
+                X_grid,
+                response_method=prediction_method,
+                pos_label=class_of_interest,
+                return_response_method_used=True,
+            )
+        except ValueError as exc:
+            if "is not a valid label" in str(exc):
+                # re-raise a more informative error message since `pos_label` is unknown
+                # to our user when interacting with
+                # `DecisionBoundaryDisplay.from_estimator`
+                raise ValueError(
+                    f"class_of_interest={class_of_interest} is not a valid label: It "
+                    f"should be one of {estimator.classes_}"
+                ) from exc
+            raise
+
+        # convert classes predictions into integers
+        if response_method_used == "predict" and hasattr(estimator, "classes_"):
+            encoder = LabelEncoder()
+            encoder.classes_ = estimator.classes_
+            response = encoder.transform(response)
+
+        if response.ndim == 1:
+            response = response.reshape(*xx0.shape)
+        else:
+            if is_regressor(estimator):
+                raise ValueError("Multi-output regressors are not supported")
+
+            if class_of_interest is not None:
+                # For the multiclass case, `_get_response_values` returns the response
+                # as-is. Thus, we have a column per class and we need to select the
+                # column corresponding to the positive class.
+                col_idx = np.flatnonzero(estimator.classes_ == class_of_interest)[0]
+                response = response[:, col_idx].reshape(*xx0.shape)
+            else:
+                response = response.reshape(*xx0.shape, response.shape[-1])
+
+        if xlabel is None:
+            xlabel = X.columns[0] if hasattr(X, "columns") else ""
+
+        if ylabel is None:
+            ylabel = X.columns[1] if hasattr(X, "columns") else ""
+
+        display = cls(
+            xx0=xx0,
+            xx1=xx1,
+            response=response,
+            multiclass_colors=multiclass_colors,
+            xlabel=xlabel,
+            ylabel=ylabel,
+        )
+        return display.plot(ax=ax, plot_method=plot_method, **kwargs)
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/partial_dependence.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/partial_dependence.py
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/init.py
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/test_boundary_decision_display.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/test_boundary_decision_display.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/test_plot_partial_dependence.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/pycache/test_plot_partial_dependence.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
@@ -0,0 +1,710 @@
+import warnings
+
+import numpy as np
+import pytest
+
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.datasets import (
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_multilabel_classification,
+)
+from sklearn.ensemble import IsolationForest
+from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import scale
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import parse_version
+
+X, y = make_classification(
+    n_informative=1,
+    n_redundant=1,
+    n_clusters_per_class=1,
+    n_features=2,
+    random_state=42,
+)
+
+
+def load_iris_2d_scaled():
+    X, y = load_iris(return_X_y=True)
+    X = scale(X)[:, :2]
+    return X, y
+
+
+@pytest.fixture(scope="module")
+def fitted_clf():
+    return LogisticRegression().fit(X, y)
+
+
+def test_input_data_dimension(pyplot):
+    """Check that we raise an error when `X` does not have exactly 2 features."""
+    X, y = make_classification(n_samples=10, n_features=4, random_state=0)
+
+    clf = LogisticRegression().fit(X, y)
+    msg = "n_features must be equal to 2. Got 4 instead."
+    with pytest.raises(ValueError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(estimator=clf, X=X)
+
+
+def test_check_boundary_response_method_error():
+    """Check error raised for multi-output multi-class classifiers by
+    `_check_boundary_response_method`.
+    """
+
+    class MultiLabelClassifier:
+        classes_ = [np.array([0, 1]), np.array([0, 1])]
+
+    err_msg = "Multi-label and multi-output multi-class classifiers are not supported"
+    with pytest.raises(ValueError, match=err_msg):
+        _check_boundary_response_method(MultiLabelClassifier(), "predict", None)
+
+
+@pytest.mark.parametrize(
+    "estimator, response_method, class_of_interest, expected_prediction_method",
+    [
+        (DecisionTreeRegressor(), "predict", None, "predict"),
+        (DecisionTreeRegressor(), "auto", None, "predict"),
+        (LogisticRegression().fit(*load_iris_2d_scaled()), "predict", None, "predict"),
+        (
+            LogisticRegression().fit(*load_iris_2d_scaled()),
+            "auto",
+            None,
+            ["decision_function", "predict_proba", "predict"],
+        ),
+        (
+            LogisticRegression().fit(*load_iris_2d_scaled()),
+            "predict_proba",
+            0,
+            "predict_proba",
+        ),
+        (
+            LogisticRegression().fit(*load_iris_2d_scaled()),
+            "decision_function",
+            0,
+            "decision_function",
+        ),
+        (
+            LogisticRegression().fit(X, y),
+            "auto",
+            None,
+            ["decision_function", "predict_proba", "predict"],
+        ),
+        (LogisticRegression().fit(X, y), "predict", None, "predict"),
+        (
+            LogisticRegression().fit(X, y),
+            ["predict_proba", "decision_function"],
+            None,
+            ["predict_proba", "decision_function"],
+        ),
+    ],
+)
+def test_check_boundary_response_method(
+    estimator, response_method, class_of_interest, expected_prediction_method
+):
+    """Check the behaviour of `_check_boundary_response_method` for the supported
+    cases.
+    """
+    prediction_method = _check_boundary_response_method(
+        estimator, response_method, class_of_interest
+    )
+    assert prediction_method == expected_prediction_method
+
+
+def test_multiclass_predict(pyplot):
+    """Check multiclass `response=predict` gives expected results."""
+    grid_resolution = 10
+    eps = 1.0
+    X, y = make_classification(n_classes=3, n_informative=3, random_state=0)
+    X = X[:, [0, 1]]
+    lr = LogisticRegression(random_state=0).fit(X, y)
+
+    disp = DecisionBoundaryDisplay.from_estimator(
+        lr, X, response_method="predict", grid_resolution=grid_resolution, eps=1.0
+    )
+
+    x0_min, x0_max = X[:, 0].min() - eps, X[:, 0].max() + eps
+    x1_min, x1_max = X[:, 1].min() - eps, X[:, 1].max() + eps
+    xx0, xx1 = np.meshgrid(
+        np.linspace(x0_min, x0_max, grid_resolution),
+        np.linspace(x1_min, x1_max, grid_resolution),
+    )
+    response = lr.predict(np.c_[xx0.ravel(), xx1.ravel()])
+    assert_allclose(disp.response, response.reshape(xx0.shape))
+    assert_allclose(disp.xx0, xx0)
+    assert_allclose(disp.xx1, xx1)
+
+
+@pytest.mark.parametrize(
+    "kwargs, error_msg",
+    [
+        (
+            {"plot_method": "hello_world"},
+            r"plot_method must be one of contourf, contour, pcolormesh. Got hello_world"
+            r" instead.",
+        ),
+        (
+            {"grid_resolution": 1},
+            r"grid_resolution must be greater than 1. Got 1 instead",
+        ),
+        (
+            {"grid_resolution": -1},
+            r"grid_resolution must be greater than 1. Got -1 instead",
+        ),
+        ({"eps": -1.1}, r"eps must be greater than or equal to 0. Got -1.1 instead"),
+    ],
+)
+def test_input_validation_errors(pyplot, kwargs, error_msg, fitted_clf):
+    """Check input validation from_estimator."""
+    with pytest.raises(ValueError, match=error_msg):
+        DecisionBoundaryDisplay.from_estimator(fitted_clf, X, **kwargs)
+
+
+@pytest.mark.parametrize(
+    "kwargs, error_msg",
+    [
+        (
+            {"multiclass_colors": {"dict": "not_list"}},
+            "'multiclass_colors' must be a list or a str.",
+        ),
+        ({"multiclass_colors": "not_cmap"}, "it must be a valid Matplotlib colormap"),
+        ({"multiclass_colors": ["red", "green"]}, "it must be of the same length"),
+        (
+            {"multiclass_colors": ["red", "green", "not color"]},
+            "it can only contain valid Matplotlib color names",
+        ),
+    ],
+)
+def test_input_validation_errors_multiclass_colors(pyplot, kwargs, error_msg):
+    """Check input validation for `multiclass_colors` in `from_estimator`."""
+    X, y = load_iris_2d_scaled()
+    clf = LogisticRegression().fit(X, y)
+    with pytest.raises(ValueError, match=error_msg):
+        DecisionBoundaryDisplay.from_estimator(clf, X, **kwargs)
+
+
+def test_display_plot_input_error(pyplot, fitted_clf):
+    """Check input validation for `plot`."""
+    disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, X, grid_resolution=5)
+
+    with pytest.raises(ValueError, match="plot_method must be 'contourf'"):
+        disp.plot(plot_method="hello_world")
+
+
+@pytest.mark.parametrize(
+    "response_method", ["auto", "predict", "predict_proba", "decision_function"]
+)
+@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
+def test_decision_boundary_display_classifier(
+    pyplot, fitted_clf, response_method, plot_method
+):
+    """Check that decision boundary is correct."""
+    fig, ax = pyplot.subplots()
+    eps = 2.0
+    disp = DecisionBoundaryDisplay.from_estimator(
+        fitted_clf,
+        X,
+        grid_resolution=5,
+        response_method=response_method,
+        plot_method=plot_method,
+        eps=eps,
+        ax=ax,
+    )
+    assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
+    assert disp.ax_ == ax
+    assert disp.figure_ == fig
+
+    x0, x1 = X[:, 0], X[:, 1]
+
+    x0_min, x0_max = x0.min() - eps, x0.max() + eps
+    x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+    assert disp.xx0.min() == pytest.approx(x0_min)
+    assert disp.xx0.max() == pytest.approx(x0_max)
+    assert disp.xx1.min() == pytest.approx(x1_min)
+    assert disp.xx1.max() == pytest.approx(x1_max)
+
+    fig2, ax2 = pyplot.subplots()
+    # change plotting method for second plot
+    disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
+    assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
+    assert disp.ax_ == ax2
+    assert disp.figure_ == fig2
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict", "decision_function"])
+@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
+def test_decision_boundary_display_outlier_detector(
+    pyplot, response_method, plot_method
+):
+    """Check that decision boundary is correct for outlier detector."""
+    fig, ax = pyplot.subplots()
+    eps = 2.0
+    outlier_detector = IsolationForest(random_state=0).fit(X, y)
+    disp = DecisionBoundaryDisplay.from_estimator(
+        outlier_detector,
+        X,
+        grid_resolution=5,
+        response_method=response_method,
+        plot_method=plot_method,
+        eps=eps,
+        ax=ax,
+    )
+    assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
+    assert disp.ax_ == ax
+    assert disp.figure_ == fig
+
+    x0, x1 = X[:, 0], X[:, 1]
+
+    x0_min, x0_max = x0.min() - eps, x0.max() + eps
+    x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+    assert disp.xx0.min() == pytest.approx(x0_min)
+    assert disp.xx0.max() == pytest.approx(x0_max)
+    assert disp.xx1.min() == pytest.approx(x1_min)
+    assert disp.xx1.max() == pytest.approx(x1_max)
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict"])
+@pytest.mark.parametrize("plot_method", ["contourf", "contour"])
+def test_decision_boundary_display_regressor(pyplot, response_method, plot_method):
+    """Check that we can display the decision boundary for a regressor."""
+    X, y = load_diabetes(return_X_y=True)
+    X = X[:, :2]
+    tree = DecisionTreeRegressor().fit(X, y)
+    fig, ax = pyplot.subplots()
+    eps = 2.0
+    disp = DecisionBoundaryDisplay.from_estimator(
+        tree,
+        X,
+        response_method=response_method,
+        ax=ax,
+        eps=eps,
+        plot_method=plot_method,
+    )
+    assert isinstance(disp.surface_, pyplot.matplotlib.contour.QuadContourSet)
+    assert disp.ax_ == ax
+    assert disp.figure_ == fig
+
+    x0, x1 = X[:, 0], X[:, 1]
+
+    x0_min, x0_max = x0.min() - eps, x0.max() + eps
+    x1_min, x1_max = x1.min() - eps, x1.max() + eps
+
+    assert disp.xx0.min() == pytest.approx(x0_min)
+    assert disp.xx0.max() == pytest.approx(x0_max)
+    assert disp.xx1.min() == pytest.approx(x1_min)
+    assert disp.xx1.max() == pytest.approx(x1_max)
+
+    fig2, ax2 = pyplot.subplots()
+    # change plotting method for second plot
+    disp.plot(plot_method="pcolormesh", ax=ax2, shading="auto")
+    assert isinstance(disp.surface_, pyplot.matplotlib.collections.QuadMesh)
+    assert disp.ax_ == ax2
+    assert disp.figure_ == fig2
+
+
+@pytest.mark.parametrize(
+    "response_method, msg",
+    [
+        (
+            "predict_proba",
+            "MyClassifier has none of the following attributes: predict_proba",
+        ),
+        (
+            "decision_function",
+            "MyClassifier has none of the following attributes: decision_function",
+        ),
+        (
+            "auto",
+            (
+                "MyClassifier has none of the following attributes: decision_function, "
+                "predict_proba, predict"
+            ),
+        ),
+        (
+            "bad_method",
+            "MyClassifier has none of the following attributes: bad_method",
+        ),
+    ],
+)
+def test_error_bad_response(pyplot, response_method, msg):
+    """Check errors for bad response."""
+
+    class MyClassifier(ClassifierMixin, BaseEstimator):
+        def fit(self, X, y):
+            self.fitted_ = True
+            self.classes_ = [0, 1]
+            return self
+
+    clf = MyClassifier().fit(X, y)
+
+    with pytest.raises(AttributeError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(clf, X, response_method=response_method)
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
+def test_multilabel_classifier_error(pyplot, response_method):
+    """Check that multilabel classifier raises correct error."""
+    X, y = make_multilabel_classification(random_state=0)
+    X = X[:, :2]
+    tree = DecisionTreeClassifier().fit(X, y)
+
+    msg = "Multi-label and multi-output multi-class classifiers are not supported"
+    with pytest.raises(ValueError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(
+            tree,
+            X,
+            response_method=response_method,
+        )
+
+
+@pytest.mark.parametrize("response_method", ["auto", "predict", "predict_proba"])
+def test_multi_output_multi_class_classifier_error(pyplot, response_method):
+    """Check that multi-output multi-class classifier raises correct error."""
+    X = np.asarray([[0, 1], [1, 2]])
+    y = np.asarray([["tree", "cat"], ["cat", "tree"]])
+    tree = DecisionTreeClassifier().fit(X, y)
+
+    msg = "Multi-label and multi-output multi-class classifiers are not supported"
+    with pytest.raises(ValueError, match=msg):
+        DecisionBoundaryDisplay.from_estimator(
+            tree,
+            X,
+            response_method=response_method,
+        )
+
+
+def test_multioutput_regressor_error(pyplot):
+    """Check that multioutput regressor raises correct error."""
+    X = np.asarray([[0, 1], [1, 2]])
+    y = np.asarray([[0, 1], [4, 1]])
+    tree = DecisionTreeRegressor().fit(X, y)
+    with pytest.raises(ValueError, match="Multi-output regressors are not supported"):
+        DecisionBoundaryDisplay.from_estimator(tree, X, response_method="predict")
+
+
+@pytest.mark.parametrize(
+    "response_method",
+    ["predict_proba", "decision_function", ["predict_proba", "predict"]],
+)
+def test_regressor_unsupported_response(pyplot, response_method):
+    """Check that we can display the decision boundary for a regressor."""
+    X, y = load_diabetes(return_X_y=True)
+    X = X[:, :2]
+    tree = DecisionTreeRegressor().fit(X, y)
+    err_msg = "should either be a classifier to be used with response_method"
+    with pytest.raises(ValueError, match=err_msg):
+        DecisionBoundaryDisplay.from_estimator(tree, X, response_method=response_method)
+
+
+@pytest.mark.filterwarnings(
+    # We expect to raise the following warning because the classifier is fit on a
+    # NumPy array
+    "ignore:X has feature names, but LogisticRegression was fitted without"
+)
+def test_dataframe_labels_used(pyplot, fitted_clf):
+    """Check that column names are used for pandas."""
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame(X, columns=["col_x", "col_y"])
+
+    # pandas column names are used by default
+    _, ax = pyplot.subplots()
+    disp = DecisionBoundaryDisplay.from_estimator(fitted_clf, df, ax=ax)
+    assert ax.get_xlabel() == "col_x"
+    assert ax.get_ylabel() == "col_y"
+
+    # second call to plot will have the names
+    fig, ax = pyplot.subplots()
+    disp.plot(ax=ax)
+    assert ax.get_xlabel() == "col_x"
+    assert ax.get_ylabel() == "col_y"
+
+    # axes with a label will not get overridden
+    fig, ax = pyplot.subplots()
+    ax.set(xlabel="hello", ylabel="world")
+    disp.plot(ax=ax)
+    assert ax.get_xlabel() == "hello"
+    assert ax.get_ylabel() == "world"
+
+    # labels get overridden only if provided to the `plot` method
+    disp.plot(ax=ax, xlabel="overwritten_x", ylabel="overwritten_y")
+    assert ax.get_xlabel() == "overwritten_x"
+    assert ax.get_ylabel() == "overwritten_y"
+
+    # labels do not get inferred if provided to `from_estimator`
+    _, ax = pyplot.subplots()
+    disp = DecisionBoundaryDisplay.from_estimator(
+        fitted_clf, df, ax=ax, xlabel="overwritten_x", ylabel="overwritten_y"
+    )
+    assert ax.get_xlabel() == "overwritten_x"
+    assert ax.get_ylabel() == "overwritten_y"
+
+
+def test_string_target(pyplot):
+    """Check that decision boundary works with classifiers trained on string labels."""
+    iris = load_iris()
+    X = iris.data[:, [0, 1]]
+
+    # Use strings as target
+    y = iris.target_names[iris.target]
+    log_reg = LogisticRegression().fit(X, y)
+
+    # Does not raise
+    DecisionBoundaryDisplay.from_estimator(
+        log_reg,
+        X,
+        grid_resolution=5,
+        response_method="predict",
+    )
+
+
+@pytest.mark.parametrize("constructor_name", ["pandas", "polars"])
+def test_dataframe_support(pyplot, constructor_name):
+    """Check that passing a dataframe at fit and to the Display does not
+    raise warnings.
+
+    Non-regression test for:
+    * https://github.com/scikit-learn/scikit-learn/issues/23311
+    * https://github.com/scikit-learn/scikit-learn/issues/28717
+    """
+    df = _convert_container(
+        X, constructor_name=constructor_name, columns_name=["col_x", "col_y"]
+    )
+    estimator = LogisticRegression().fit(df, y)
+
+    with warnings.catch_warnings():
+        # no warnings linked to feature names validation should be raised
+        warnings.simplefilter("error", UserWarning)
+        DecisionBoundaryDisplay.from_estimator(estimator, df, response_method="predict")
+
+
+@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
+def test_class_of_interest_binary(pyplot, response_method):
+    """Check the behaviour of passing `class_of_interest` for plotting the output of
+    `predict_proba` and `decision_function` in the binary case.
+    """
+    iris = load_iris()
+    X = iris.data[:100, :2]
+    y = iris.target[:100]
+    assert_array_equal(np.unique(y), [0, 1])
+
+    estimator = LogisticRegression().fit(X, y)
+    # We will check that `class_of_interest=None` is equivalent to
+    # `class_of_interest=estimator.classes_[1]`
+    disp_default = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=None,
+    )
+    disp_class_1 = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=estimator.classes_[1],
+    )
+
+    assert_allclose(disp_default.response, disp_class_1.response)
+
+    # we can check that `_get_response_values` modifies the response when targeting
+    # the other class, i.e. 1 - p(y=1|x) for `predict_proba` and -decision_function
+    # for `decision_function`.
+    disp_class_0 = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=estimator.classes_[0],
+    )
+
+    if response_method == "predict_proba":
+        assert_allclose(disp_default.response, 1 - disp_class_0.response)
+    else:
+        assert response_method == "decision_function"
+        assert_allclose(disp_default.response, -disp_class_0.response)
+
+
+@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
+def test_class_of_interest_multiclass(pyplot, response_method):
+    """Check the behaviour of passing `class_of_interest` for plotting the output of
+    `predict_proba` and `decision_function` in the multiclass case.
+    """
+    iris = load_iris()
+    X = iris.data[:, :2]
+    y = iris.target  # the target are numerical labels
+    class_of_interest_idx = 2
+
+    estimator = LogisticRegression().fit(X, y)
+    disp = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=class_of_interest_idx,
+    )
+
+    # we will check that we plot the expected values as response
+    grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
+    response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
+    assert_allclose(response.reshape(*disp.response.shape), disp.response)
+
+    # make the same test but this time using target as strings
+    y = iris.target_names[iris.target]
+    estimator = LogisticRegression().fit(X, y)
+
+    disp = DecisionBoundaryDisplay.from_estimator(
+        estimator,
+        X,
+        response_method=response_method,
+        class_of_interest=iris.target_names[class_of_interest_idx],
+    )
+
+    grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
+    response = getattr(estimator, response_method)(grid)[:, class_of_interest_idx]
+    assert_allclose(response.reshape(*disp.response.shape), disp.response)
+
+    # check that we raise an error for unknown labels
+    # this test should already be handled in `_get_response_values` but we can have this
+    # test here as well
+    err_msg = "class_of_interest=2 is not a valid label: It should be one of"
+    with pytest.raises(ValueError, match=err_msg):
+        DecisionBoundaryDisplay.from_estimator(
+            estimator,
+            X,
+            response_method=response_method,
+            class_of_interest=class_of_interest_idx,
+        )
+
+
+@pytest.mark.parametrize("response_method", ["predict_proba", "decision_function"])
+def test_multiclass_plot_max_class(pyplot, response_method):
+    """Check plot correct when plotting max multiclass class."""
+    import matplotlib as mpl
+
+    # In matplotlib < v3.5, default value of `pcolormesh(shading)` is 'flat', which
+    # results in the last row and column being dropped. Thus older versions produce
+    # a 99x99 grid, while newer versions produce a 100x100 grid.
+    if parse_version(mpl.__version__) < parse_version("3.5"):
+        pytest.skip("`pcolormesh` in Matplotlib >= 3.5 gives smaller grid size.")
+
+    X, y = load_iris_2d_scaled()
+    clf = LogisticRegression().fit(X, y)
+
+    disp = DecisionBoundaryDisplay.from_estimator(
+        clf,
+        X,
+        plot_method="pcolormesh",
+        response_method=response_method,
+    )
+
+    grid = np.concatenate([disp.xx0.reshape(-1, 1), disp.xx1.reshape(-1, 1)], axis=1)
+    response = getattr(clf, response_method)(grid).reshape(*disp.response.shape)
+    assert_allclose(response, disp.response)
+
+    assert len(disp.surface_) == len(clf.classes_)
+    # Get which class has highest response and check it is plotted
+    highest_class = np.argmax(response, axis=2)
+    for idx, quadmesh in enumerate(disp.surface_):
+        # Note quadmesh mask is True (i.e. masked) when `idx` is NOT the highest class
+        assert_array_equal(
+            highest_class != idx,
+            quadmesh.get_array().mask.reshape(*highest_class.shape),
+        )
+
+
+@pytest.mark.parametrize(
+    "multiclass_colors",
+    [
+        "plasma",
+        "Blues",
+        ["red", "green", "blue"],
+    ],
+)
+@pytest.mark.parametrize("plot_method", ["contourf", "contour", "pcolormesh"])
+def test_multiclass_colors_cmap(pyplot, plot_method, multiclass_colors):
+    """Check correct cmap used for all `multiclass_colors` inputs."""
+    import matplotlib as mpl
+
+    if parse_version(mpl.__version__) < parse_version("3.5"):
+        pytest.skip(
+            "Matplotlib >= 3.5 is needed for `==` to check equivalence of colormaps"
+        )
+
+    X, y = load_iris_2d_scaled()
+    clf = LogisticRegression().fit(X, y)
+
+    disp = DecisionBoundaryDisplay.from_estimator(
+        clf,
+        X,
+        plot_method=plot_method,
+        multiclass_colors=multiclass_colors,
+    )
+
+    if multiclass_colors == "plasma":
+        colors = mpl.pyplot.get_cmap(multiclass_colors, len(clf.classes_)).colors
+    elif multiclass_colors == "Blues":
+        cmap = mpl.pyplot.get_cmap(multiclass_colors, len(clf.classes_))
+        colors = cmap(np.linspace(0, 1, len(clf.classes_)))
+    else:
+        colors = [mpl.colors.to_rgba(color) for color in multiclass_colors]
+
+    if plot_method != "contour":
+        cmaps = [
+            mpl.colors.LinearSegmentedColormap.from_list(
+                f"colormap_{class_idx}", [(1.0, 1.0, 1.0, 1.0), (r, g, b, 1.0)]
+            )
+            for class_idx, (r, g, b, _) in enumerate(colors)
+        ]
+        for idx, quad in enumerate(disp.surface_):
+            assert quad.cmap == cmaps[idx]
+    else:
+        assert_allclose(disp.surface_.colors, colors)
+
+
+def test_cmap_and_colors_logic(pyplot):
+    """Check the handling logic for `cmap` and `colors`."""
+    X, y = load_iris_2d_scaled()
+    clf = LogisticRegression().fit(X, y)
+
+    with pytest.warns(
+        UserWarning,
+        match="'cmap' is ignored in favor of 'multiclass_colors'",
+    ):
+        DecisionBoundaryDisplay.from_estimator(
+            clf,
+            X,
+            multiclass_colors="plasma",
+            cmap="Blues",
+        )
+
+    with pytest.warns(
+        UserWarning,
+        match="'colors' is ignored in favor of 'multiclass_colors'",
+    ):
+        DecisionBoundaryDisplay.from_estimator(
+            clf,
+            X,
+            multiclass_colors="plasma",
+            colors="blue",
+        )
+
+
+def test_subclass_named_constructors_return_type_is_subclass(pyplot):
+    """Check that named constructors return the correct type when subclassed.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/pull/27675
+    """
+    clf = LogisticRegression().fit(X, y)
+
+    class SubclassOfDisplay(DecisionBoundaryDisplay):
+        pass
+
+    curve = SubclassOfDisplay.from_estimator(estimator=clf, X=X)
+
+    assert isinstance(curve, SubclassOfDisplay)
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/tests/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/tests/init.py
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_partial_dependence.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_partial_dependence.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_pd_utils.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_pd_utils.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_permutation_importance.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/tests/pycache/test_permutation_importance.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_partial_dependence.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_partial_dependence.py
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_pd_utils.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_pd_utils.py
@@ -0,0 +1,47 @@
+import numpy as np
+import pytest
+
+from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index
+from sklearn.utils._testing import _convert_container
+
+
+@pytest.mark.parametrize(
+    "feature_names, array_type, expected_feature_names",
+    [
+        (None, "array", ["x0", "x1", "x2"]),
+        (None, "dataframe", ["a", "b", "c"]),
+        (np.array(["a", "b", "c"]), "array", ["a", "b", "c"]),
+    ],
+)
+def test_check_feature_names(feature_names, array_type, expected_feature_names):
+    X = np.random.randn(10, 3)
+    column_names = ["a", "b", "c"]
+    X = _convert_container(X, constructor_name=array_type, columns_name=column_names)
+    feature_names_validated = _check_feature_names(X, feature_names)
+    assert feature_names_validated == expected_feature_names
+
+
+def test_check_feature_names_error():
+    X = np.random.randn(10, 3)
+    feature_names = ["a", "b", "c", "a"]
+    msg = "feature_names should not contain duplicates."
+    with pytest.raises(ValueError, match=msg):
+        _check_feature_names(X, feature_names)
+
+
+@pytest.mark.parametrize("fx, idx", [(0, 0), (1, 1), ("a", 0), ("b", 1), ("c", 2)])
+def test_get_feature_index(fx, idx):
+    feature_names = ["a", "b", "c"]
+    assert _get_feature_index(fx, feature_names) == idx
+
+
+@pytest.mark.parametrize(
+    "fx, feature_names, err_msg",
+    [
+        ("a", None, "Cannot plot partial dependence for feature 'a'"),
+        ("d", ["a", "b", "c"], "Feature 'd' not in feature_names"),
+    ],
+)
+def test_get_feature_names_error(fx, feature_names, err_msg):
+    with pytest.raises(ValueError, match=err_msg):
+        _get_feature_index(fx, feature_names)
--- a/venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_permutation_importance.py
+++ b/venv/lib/python3.12/site-packages/sklearn/inspection/tests/test_permutation_importance.py
@@ -0,0 +1,540 @@
+import numpy as np
+import pytest
+from joblib import parallel_backend
+from numpy.testing import assert_allclose
+
+from sklearn.compose import ColumnTransformer
+from sklearn.datasets import (
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_regression,
+)
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.impute import SimpleImputer
+from sklearn.inspection import permutation_importance
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.metrics import (
+    get_scorer,
+    mean_squared_error,
+    r2_score,
+)
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale
+from sklearn.utils._testing import _convert_container
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+@pytest.mark.parametrize("max_samples", [0.5, 1.0])
+@pytest.mark.parametrize("sample_weight", [None, "ones"])
+def test_permutation_importance_correlated_feature_regression(
+    n_jobs, max_samples, sample_weight
+):
+    # Make sure that feature highly correlated to the target have a higher
+    # importance
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    X, y = load_diabetes(return_X_y=True)
+    y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
+
+    X = np.hstack([X, y_with_little_noise])
+
+    weights = np.ones_like(y) if sample_weight == "ones" else sample_weight
+    clf = RandomForestRegressor(n_estimators=10, random_state=42)
+    clf.fit(X, y)
+
+    result = permutation_importance(
+        clf,
+        X,
+        y,
+        sample_weight=weights,
+        n_repeats=n_repeats,
+        random_state=rng,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y was added as the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+@pytest.mark.parametrize("max_samples", [0.5, 1.0])
+def test_permutation_importance_correlated_feature_regression_pandas(
+    n_jobs, max_samples
+):
+    pd = pytest.importorskip("pandas")
+
+    # Make sure that feature highly correlated to the target have a higher
+    # importance
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    dataset = load_iris()
+    X, y = dataset.data, dataset.target
+    y_with_little_noise = (y + rng.normal(scale=0.001, size=y.shape[0])).reshape(-1, 1)
+
+    # Adds feature correlated with y as the last column
+    X = pd.DataFrame(X, columns=dataset.feature_names)
+    X["correlated_feature"] = y_with_little_noise
+
+    clf = RandomForestClassifier(n_estimators=10, random_state=42)
+    clf.fit(X, y)
+
+    result = permutation_importance(
+        clf,
+        X,
+        y,
+        n_repeats=n_repeats,
+        random_state=rng,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y was added as the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+@pytest.mark.parametrize("max_samples", [0.5, 1.0])
+def test_robustness_to_high_cardinality_noisy_feature(n_jobs, max_samples, seed=42):
+    # Permutation variable importance should not be affected by the high
+    # cardinality bias of traditional feature importances, especially when
+    # computed on a held-out test set:
+    rng = np.random.RandomState(seed)
+    n_repeats = 5
+    n_samples = 1000
+    n_classes = 5
+    n_informative_features = 2
+    n_noise_features = 1
+    n_features = n_informative_features + n_noise_features
+
+    # Generate a multiclass classification dataset and a set of informative
+    # binary features that can be used to predict some classes of y exactly
+    # while leaving some classes unexplained to make the problem harder.
+    classes = np.arange(n_classes)
+    y = rng.choice(classes, size=n_samples)
+    X = np.hstack([(y == c).reshape(-1, 1) for c in classes[:n_informative_features]])
+    X = X.astype(np.float32)
+
+    # Not all target classes are explained by the binary class indicator
+    # features:
+    assert n_informative_features < n_classes
+
+    # Add 10 other noisy features with high cardinality (numerical) values
+    # that can be used to overfit the training data.
+    X = np.concatenate([X, rng.randn(n_samples, n_noise_features)], axis=1)
+    assert X.shape == (n_samples, n_features)
+
+    # Split the dataset to be able to evaluate on a held-out test set. The
+    # Test size should be large enough for importance measurements to be
+    # stable:
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.5, random_state=rng
+    )
+    clf = RandomForestClassifier(n_estimators=5, random_state=rng)
+    clf.fit(X_train, y_train)
+
+    # Variable importances computed by impurity decrease on the tree node
+    # splits often use the noisy features in splits. This can give misleading
+    # impression that high cardinality noisy variables are the most important:
+    tree_importances = clf.feature_importances_
+    informative_tree_importances = tree_importances[:n_informative_features]
+    noisy_tree_importances = tree_importances[n_informative_features:]
+    assert informative_tree_importances.max() < noisy_tree_importances.min()
+
+    # Let's check that permutation-based feature importances do not have this
+    # problem.
+    r = permutation_importance(
+        clf,
+        X_test,
+        y_test,
+        n_repeats=n_repeats,
+        random_state=rng,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+
+    assert r.importances.shape == (X.shape[1], n_repeats)
+
+    # Split the importances between informative and noisy features
+    informative_importances = r.importances_mean[:n_informative_features]
+    noisy_importances = r.importances_mean[n_informative_features:]
+
+    # Because we do not have a binary variable explaining each target classes,
+    # the RF model will have to use the random variable to make some
+    # (overfitting) splits (as max_depth is not set). Therefore the noisy
+    # variables will be non-zero but with small values oscillating around
+    # zero:
+    assert max(np.abs(noisy_importances)) > 1e-7
+    assert noisy_importances.max() < 0.05
+
+    # The binary features correlated with y should have a higher importance
+    # than the high cardinality noisy features.
+    # The maximum test accuracy is 2 / 5 == 0.4, each informative feature
+    # contributing approximately a bit more than 0.2 of accuracy.
+    assert informative_importances.min() > 0.15
+
+
+def test_permutation_importance_mixed_types():
+    rng = np.random.RandomState(42)
+    n_repeats = 4
+
+    # Last column is correlated with y
+    X = np.array([[1.0, 2.0, 3.0, np.nan], [2, 1, 2, 1]]).T
+    y = np.array([0, 1, 0, 1])
+
+    clf = make_pipeline(SimpleImputer(), LogisticRegression(solver="lbfgs"))
+    clf.fit(X, y)
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+    # use another random state
+    rng = np.random.RandomState(0)
+    result2 = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
+    assert result2.importances.shape == (X.shape[1], n_repeats)
+
+    assert not np.allclose(result.importances, result2.importances)
+
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result2.importances_mean[-1] > result2.importances_mean[:-1])
+
+
+def test_permutation_importance_mixed_types_pandas():
+    pd = pytest.importorskip("pandas")
+    rng = np.random.RandomState(42)
+    n_repeats = 5
+
+    # Last column is correlated with y
+    X = pd.DataFrame({"col1": [1.0, 2.0, 3.0, np.nan], "col2": ["a", "b", "a", "b"]})
+    y = np.array([0, 1, 0, 1])
+
+    num_preprocess = make_pipeline(SimpleImputer(), StandardScaler())
+    preprocess = ColumnTransformer(
+        [("num", num_preprocess, ["col1"]), ("cat", OneHotEncoder(), ["col2"])]
+    )
+    clf = make_pipeline(preprocess, LogisticRegression(solver="lbfgs"))
+    clf.fit(X, y)
+
+    result = permutation_importance(clf, X, y, n_repeats=n_repeats, random_state=rng)
+
+    assert result.importances.shape == (X.shape[1], n_repeats)
+    # the correlated feature with y is the last column and should
+    # have the highest importance
+    assert np.all(result.importances_mean[-1] > result.importances_mean[:-1])
+
+
+def test_permutation_importance_linear_regresssion():
+    X, y = make_regression(n_samples=500, n_features=10, random_state=0)
+
+    X = scale(X)
+    y = scale(y)
+
+    lr = LinearRegression().fit(X, y)
+
+    # this relationship can be computed in closed form
+    expected_importances = 2 * lr.coef_**2
+    results = permutation_importance(
+        lr, X, y, n_repeats=50, scoring="neg_mean_squared_error"
+    )
+    assert_allclose(
+        expected_importances, results.importances_mean, rtol=1e-1, atol=1e-6
+    )
+
+
+@pytest.mark.parametrize("max_samples", [500, 1.0])
+def test_permutation_importance_equivalence_sequential_parallel(max_samples):
+    # regression test to make sure that sequential and parallel calls will
+    # output the same results.
+    # Also tests that max_samples equal to number of samples is equivalent to 1.0
+    X, y = make_regression(n_samples=500, n_features=10, random_state=0)
+    lr = LinearRegression().fit(X, y)
+
+    importance_sequential = permutation_importance(
+        lr, X, y, n_repeats=5, random_state=0, n_jobs=1, max_samples=max_samples
+    )
+
+    # First check that the problem is structured enough and that the model is
+    # complex enough to not yield trivial, constant importances:
+    imp_min = importance_sequential["importances"].min()
+    imp_max = importance_sequential["importances"].max()
+    assert imp_max - imp_min > 0.3
+
+    # The actually check that parallelism does not impact the results
+    # either with shared memory (threading) or without isolated memory
+    # via process-based parallelism using the default backend
+    # ('loky' or 'multiprocessing') depending on the joblib version:
+
+    # process-based parallelism (by default):
+    importance_processes = permutation_importance(
+        lr, X, y, n_repeats=5, random_state=0, n_jobs=2
+    )
+    assert_allclose(
+        importance_processes["importances"], importance_sequential["importances"]
+    )
+
+    # thread-based parallelism:
+    with parallel_backend("threading"):
+        importance_threading = permutation_importance(
+            lr, X, y, n_repeats=5, random_state=0, n_jobs=2
+        )
+    assert_allclose(
+        importance_threading["importances"], importance_sequential["importances"]
+    )
+
+
+@pytest.mark.parametrize("n_jobs", [None, 1, 2])
+@pytest.mark.parametrize("max_samples", [0.5, 1.0])
+def test_permutation_importance_equivalence_array_dataframe(n_jobs, max_samples):
+    # This test checks that the column shuffling logic has the same behavior
+    # both a dataframe and a simple numpy array.
+    pd = pytest.importorskip("pandas")
+
+    # regression test to make sure that sequential and parallel calls will
+    # output the same results.
+    X, y = make_regression(n_samples=100, n_features=5, random_state=0)
+    X_df = pd.DataFrame(X)
+
+    # Add a categorical feature that is statistically linked to y:
+    binner = KBinsDiscretizer(
+        n_bins=3,
+        encode="ordinal",
+        quantile_method="averaged_inverted_cdf",
+    )
+    cat_column = binner.fit_transform(y.reshape(-1, 1))
+
+    # Concatenate the extra column to the numpy array: integers will be
+    # cast to float values
+    X = np.hstack([X, cat_column])
+    assert X.dtype.kind == "f"
+
+    # Insert extra column as a non-numpy-native dtype:
+    cat_column = pd.Categorical(cat_column.ravel())
+    new_col_idx = len(X_df.columns)
+    X_df[new_col_idx] = cat_column
+    assert X_df[new_col_idx].dtype == cat_column.dtype
+
+    # Stich an arbitrary index to the dataframe:
+    X_df.index = np.arange(len(X_df)).astype(str)
+
+    rf = RandomForestRegressor(n_estimators=5, max_depth=3, random_state=0)
+    rf.fit(X, y)
+
+    n_repeats = 3
+    importance_array = permutation_importance(
+        rf,
+        X,
+        y,
+        n_repeats=n_repeats,
+        random_state=0,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+
+    # First check that the problem is structured enough and that the model is
+    # complex enough to not yield trivial, constant importances:
+    imp_min = importance_array["importances"].min()
+    imp_max = importance_array["importances"].max()
+    assert imp_max - imp_min > 0.3
+
+    # Now check that importances computed on dataframe matche the values
+    # of those computed on the array with the same data.
+    importance_dataframe = permutation_importance(
+        rf,
+        X_df,
+        y,
+        n_repeats=n_repeats,
+        random_state=0,
+        n_jobs=n_jobs,
+        max_samples=max_samples,
+    )
+    assert_allclose(
+        importance_array["importances"], importance_dataframe["importances"]
+    )
+
+
+@pytest.mark.parametrize("input_type", ["array", "dataframe"])
+def test_permutation_importance_large_memmaped_data(input_type):
+    # Smoke, non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/15810
+    n_samples, n_features = int(5e4), 4
+    X, y = make_classification(
+        n_samples=n_samples, n_features=n_features, random_state=0
+    )
+    assert X.nbytes > 1e6  # trigger joblib memmaping
+
+    X = _convert_container(X, input_type)
+    clf = DummyClassifier(strategy="prior").fit(X, y)
+
+    # Actual smoke test: should not raise any error:
+    n_repeats = 5
+    r = permutation_importance(clf, X, y, n_repeats=n_repeats, n_jobs=2)
+
+    # Auxiliary check: DummyClassifier is feature independent:
+    # permutating feature should not change the predictions
+    expected_importances = np.zeros((n_features, n_repeats))
+    assert_allclose(expected_importances, r.importances)
+
+
+def test_permutation_importance_sample_weight():
+    # Creating data with 2 features and 1000 samples, where the target
+    # variable is a linear combination of the two features, such that
+    # in half of the samples the impact of feature 1 is twice the impact of
+    # feature 2, and vice versa on the other half of the samples.
+    rng = np.random.RandomState(1)
+    n_samples = 1000
+    n_features = 2
+    n_half_samples = n_samples // 2
+    x = rng.normal(0.0, 0.001, (n_samples, n_features))
+    y = np.zeros(n_samples)
+    y[:n_half_samples] = 2 * x[:n_half_samples, 0] + x[:n_half_samples, 1]
+    y[n_half_samples:] = x[n_half_samples:, 0] + 2 * x[n_half_samples:, 1]
+
+    # Fitting linear regression with perfect prediction
+    lr = LinearRegression(fit_intercept=False)
+    lr.fit(x, y)
+
+    # When all samples are weighted with the same weights, the ratio of
+    # the two features importance should equal to 1 on expectation (when using
+    # mean absolutes error as the loss function).
+    pi = permutation_importance(
+        lr, x, y, random_state=1, scoring="neg_mean_absolute_error", n_repeats=200
+    )
+    x1_x2_imp_ratio_w_none = pi.importances_mean[0] / pi.importances_mean[1]
+    assert x1_x2_imp_ratio_w_none == pytest.approx(1, 0.01)
+
+    # When passing a vector of ones as the sample_weight, results should be
+    # the same as in the case that sample_weight=None.
+    w = np.ones(n_samples)
+    pi = permutation_importance(
+        lr,
+        x,
+        y,
+        random_state=1,
+        scoring="neg_mean_absolute_error",
+        n_repeats=200,
+        sample_weight=w,
+    )
+    x1_x2_imp_ratio_w_ones = pi.importances_mean[0] / pi.importances_mean[1]
+    assert x1_x2_imp_ratio_w_ones == pytest.approx(x1_x2_imp_ratio_w_none, 0.01)
+
+    # When the ratio between the weights of the first half of the samples and
+    # the second half of the samples approaches to infinity, the ratio of
+    # the two features importance should equal to 2 on expectation (when using
+    # mean absolutes error as the loss function).
+    w = np.hstack([np.repeat(10.0**10, n_half_samples), np.repeat(1.0, n_half_samples)])
+    lr.fit(x, y, w)
+    pi = permutation_importance(
+        lr,
+        x,
+        y,
+        random_state=1,
+        scoring="neg_mean_absolute_error",
+        n_repeats=200,
+        sample_weight=w,
+    )
+    x1_x2_imp_ratio_w = pi.importances_mean[0] / pi.importances_mean[1]
+    assert x1_x2_imp_ratio_w / x1_x2_imp_ratio_w_none == pytest.approx(2, 0.01)
+
+
+def test_permutation_importance_no_weights_scoring_function():
+    # Creating a scorer function that does not takes sample_weight
+    def my_scorer(estimator, X, y):
+        return 1
+
+    # Creating some data and estimator for the permutation test
+    x = np.array([[1, 2], [3, 4]])
+    y = np.array([1, 2])
+    w = np.array([1, 1])
+    lr = LinearRegression()
+    lr.fit(x, y)
+
+    # test that permutation_importance does not return error when
+    # sample_weight is None
+    try:
+        permutation_importance(lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1)
+    except TypeError:
+        pytest.fail(
+            "permutation_test raised an error when using a scorer "
+            "function that does not accept sample_weight even though "
+            "sample_weight was None"
+        )
+
+    # test that permutation_importance raise exception when sample_weight is
+    # not None
+    with pytest.raises(TypeError):
+        permutation_importance(
+            lr, x, y, random_state=1, scoring=my_scorer, n_repeats=1, sample_weight=w
+        )
+
+
+@pytest.mark.parametrize(
+    "list_single_scorer, multi_scorer",
+    [
+        (["r2", "neg_mean_squared_error"], ["r2", "neg_mean_squared_error"]),
+        (
+            ["r2", "neg_mean_squared_error"],
+            {
+                "r2": get_scorer("r2"),
+                "neg_mean_squared_error": get_scorer("neg_mean_squared_error"),
+            },
+        ),
+        (
+            ["r2", "neg_mean_squared_error"],
+            lambda estimator, X, y: {
+                "r2": r2_score(y, estimator.predict(X)),
+                "neg_mean_squared_error": -mean_squared_error(y, estimator.predict(X)),
+            },
+        ),
+    ],
+)
+def test_permutation_importance_multi_metric(list_single_scorer, multi_scorer):
+    # Test permutation importance when scoring contains multiple scorers
+
+    # Creating some data and estimator for the permutation test
+    x, y = make_regression(n_samples=500, n_features=10, random_state=0)
+    lr = LinearRegression().fit(x, y)
+
+    multi_importance = permutation_importance(
+        lr, x, y, random_state=1, scoring=multi_scorer, n_repeats=2
+    )
+    assert set(multi_importance.keys()) == set(list_single_scorer)
+
+    for scorer in list_single_scorer:
+        multi_result = multi_importance[scorer]
+        single_result = permutation_importance(
+            lr, x, y, random_state=1, scoring=scorer, n_repeats=2
+        )
+
+        assert_allclose(multi_result.importances, single_result.importances)
+
+
+def test_permutation_importance_max_samples_error():
+    """Check that a proper error message is raised when `max_samples` is not
+    set to a valid input value.
+    """
+    X = np.array([(1.0, 2.0, 3.0, 4.0)]).T
+    y = np.array([0, 1, 0, 1])
+
+    clf = LogisticRegression()
+    clf.fit(X, y)
+
+    err_msg = r"max_samples must be <= n_samples"
+
+    with pytest.raises(ValueError, match=err_msg):
+        permutation_importance(clf, X, y, max_samples=5)