add read me

2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/init.py
@@ -0,0 +1,63 @@
+"""Methods for scaling, centering, normalization, binarization, and more."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from ._data import (
+    Binarizer,
+    KernelCenterer,
+    MaxAbsScaler,
+    MinMaxScaler,
+    Normalizer,
+    PowerTransformer,
+    QuantileTransformer,
+    RobustScaler,
+    StandardScaler,
+    add_dummy_feature,
+    binarize,
+    maxabs_scale,
+    minmax_scale,
+    normalize,
+    power_transform,
+    quantile_transform,
+    robust_scale,
+    scale,
+)
+from ._discretization import KBinsDiscretizer
+from ._encoders import OneHotEncoder, OrdinalEncoder
+from ._function_transformer import FunctionTransformer
+from ._label import LabelBinarizer, LabelEncoder, MultiLabelBinarizer, label_binarize
+from ._polynomial import PolynomialFeatures, SplineTransformer
+from ._target_encoder import TargetEncoder
+
+__all__ = [
+    "Binarizer",
+    "FunctionTransformer",
+    "KBinsDiscretizer",
+    "KernelCenterer",
+    "LabelBinarizer",
+    "LabelEncoder",
+    "MaxAbsScaler",
+    "MinMaxScaler",
+    "MultiLabelBinarizer",
+    "Normalizer",
+    "OneHotEncoder",
+    "OrdinalEncoder",
+    "PolynomialFeatures",
+    "PowerTransformer",
+    "QuantileTransformer",
+    "RobustScaler",
+    "SplineTransformer",
+    "StandardScaler",
+    "TargetEncoder",
+    "add_dummy_feature",
+    "binarize",
+    "label_binarize",
+    "maxabs_scale",
+    "minmax_scale",
+    "normalize",
+    "power_transform",
+    "quantile_transform",
+    "robust_scale",
+    "scale",
+]
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_data.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_data.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_discretization.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_discretization.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_encoders.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_encoders.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_function_transformer.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_function_transformer.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_label.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_label.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_polynomial.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_polynomial.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_target_encoder.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/pycache/_target_encoder.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_csr_polynomial_expansion.cpython-312-x86_64-linux-gnu.so
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_csr_polynomial_expansion.cpython-312-x86_64-linux-gnu.so
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_csr_polynomial_expansion.pyx
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_csr_polynomial_expansion.pyx
@@ -0,0 +1,258 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from ..utils._typedefs cimport uint8_t, int64_t, intp_t
+
+ctypedef uint8_t FLAG_t
+
+# We use the following verbatim block to determine whether the current
+# platform's compiler supports 128-bit integer values intrinsically.
+# This should work for GCC and CLANG on 64-bit architectures, but doesn't for
+# MSVC on any architecture. We prefer to use 128-bit integers when possible
+# because the intermediate calculations have a non-trivial risk of overflow. It
+# is, however, very unlikely to come up on an average use case, hence 64-bit
+# integers (i.e. `long long`) are "good enough" for most common cases. There is
+# not much we can do to efficiently mitigate  the overflow risk on the Windows
+# platform at this time. Consider this a "best effort" design decision that
+# could be revisited later in case someone comes up with a safer option that
+# does not hurt the performance of the common cases.
+# See `test_sizeof_LARGEST_INT_t()`for more information on exact type expectations.
+cdef extern from *:
+    """
+    #ifdef __SIZEOF_INT128__
+        typedef __int128 LARGEST_INT_t;
+    #elif (__clang__ || __EMSCRIPTEN__) && !__i386__
+        typedef _BitInt(128) LARGEST_INT_t;
+    #else
+        typedef long long LARGEST_INT_t;
+    #endif
+    """
+    ctypedef long long LARGEST_INT_t
+
+
+# Determine the size of `LARGEST_INT_t` at runtime.
+# Used in `test_sizeof_LARGEST_INT_t`.
+def _get_sizeof_LARGEST_INT_t():
+    return sizeof(LARGEST_INT_t)
+
+
+# TODO: use `{int,float}{32,64}_t` when cython#5230 is resolved:
+# https://github.com/cython/cython/issues/5230
+ctypedef fused DATA_t:
+    float
+    double
+    int
+    long long
+# INDEX_{A,B}_t are defined to generate a proper Cartesian product
+# of types through Cython fused-type expansion.
+ctypedef fused INDEX_A_t:
+    signed int
+    signed long long
+ctypedef fused INDEX_B_t:
+    signed int
+    signed long long
+
+cdef inline int64_t _deg2_column(
+    LARGEST_INT_t n_features,
+    LARGEST_INT_t i,
+    LARGEST_INT_t j,
+    FLAG_t interaction_only
+) nogil:
+    """Compute the index of the column for a degree 2 expansion
+
+    n_features is the dimensionality of the input data, i and j are the indices
+    for the columns involved in the expansion.
+    """
+    if interaction_only:
+        return n_features * i - i * (i + 3) / 2 - 1 + j
+    else:
+        return n_features * i - i* (i + 1) / 2 + j
+
+
+cdef inline int64_t _deg3_column(
+    LARGEST_INT_t n_features,
+    LARGEST_INT_t i,
+    LARGEST_INT_t j,
+    LARGEST_INT_t k,
+    FLAG_t interaction_only
+) nogil:
+    """Compute the index of the column for a degree 3 expansion
+
+    n_features is the dimensionality of the input data, i, j and k are the indices
+    for the columns involved in the expansion.
+    """
+    if interaction_only:
+        return (
+            (
+                (3 * n_features) * (n_features * i - i**2)
+                + i * (i**2 + 11) - (3 * j) * (j + 3)
+            ) / 6 + i**2 + n_features * (j - 1 - 2 * i) + k
+        )
+    else:
+        return (
+            (
+                (3 * n_features) * (n_features * i - i**2)
+                + i ** 3 - i - (3 * j) * (j + 1)
+            ) / 6 + n_features * j + k
+        )
+
+
+def py_calc_expanded_nnz_deg2(n, interaction_only):
+    return n * (n + 1) // 2 - interaction_only * n
+
+
+def py_calc_expanded_nnz_deg3(n, interaction_only):
+    return n * (n**2 + 3 * n + 2) // 6 - interaction_only * n**2
+
+
+cpdef int64_t _calc_expanded_nnz(
+    LARGEST_INT_t n,
+    FLAG_t interaction_only,
+    LARGEST_INT_t degree
+):
+    """
+    Calculates the number of non-zero interaction terms generated by the
+    non-zero elements of a single row.
+    """
+    # This is the maximum value before the intermediate computation
+    # d**2 + d overflows
+    # Solution to d**2 + d = maxint64
+    # SymPy: solve(x**2 + x - int64_max, x)
+    cdef int64_t MAX_SAFE_INDEX_CALC_DEG2 = 3037000499
+
+    # This is the maximum value before the intermediate computation
+    # d**3 + 3 * d**2 + 2*d overflows
+    # Solution to d**3 + 3 * d**2 + 2*d = maxint64
+    # SymPy: solve(x * (x**2 + 3 * x + 2) - int64_max, x)
+    cdef int64_t MAX_SAFE_INDEX_CALC_DEG3 = 2097151
+
+    if degree == 2:
+        # Only need to check when not using 128-bit integers
+        if sizeof(LARGEST_INT_t) < 16 and n <= MAX_SAFE_INDEX_CALC_DEG2:
+            return n * (n + 1) / 2 - interaction_only * n
+        return <int64_t> py_calc_expanded_nnz_deg2(n, interaction_only)
+    else:
+        # Only need to check when not using 128-bit integers
+        if sizeof(LARGEST_INT_t) < 16 and n <= MAX_SAFE_INDEX_CALC_DEG3:
+            return n * (n**2 + 3 * n + 2) / 6 - interaction_only * n**2
+        return <int64_t> py_calc_expanded_nnz_deg3(n, interaction_only)
+
+cpdef int64_t _calc_total_nnz(
+    INDEX_A_t[:] indptr,
+    FLAG_t interaction_only,
+    int64_t degree,
+):
+    """
+    Calculates the number of non-zero interaction terms generated by the
+    non-zero elements across all rows for a single degree.
+    """
+    cdef int64_t total_nnz=0
+    cdef intp_t row_idx
+    for row_idx in range(len(indptr) - 1):
+        total_nnz += _calc_expanded_nnz(
+            indptr[row_idx + 1] - indptr[row_idx],
+            interaction_only,
+            degree
+        )
+    return total_nnz
+
+
+cpdef void _csr_polynomial_expansion(
+    const DATA_t[:] data,           # IN READ-ONLY
+    const INDEX_A_t[:] indices,     # IN READ-ONLY
+    const INDEX_A_t[:] indptr,      # IN READ-ONLY
+    INDEX_A_t n_features,
+    DATA_t[:] result_data,          # OUT
+    INDEX_B_t[:] result_indices,    # OUT
+    INDEX_B_t[:] result_indptr,     # OUT
+    FLAG_t interaction_only,
+    FLAG_t degree
+):
+    """
+    Perform a second or third degree polynomial or interaction expansion on a
+    compressed sparse row (CSR) matrix. The method used only takes products of
+    non-zero features. For a matrix with density :math:`d`, this results in a
+    speedup on the order of :math:`(1/d)^k` where :math:`k` is the degree of
+    the expansion, assuming all rows are of similar density.
+
+    Parameters
+    ----------
+    data : memory view on nd-array
+        The "data" attribute of the input CSR matrix.
+
+    indices : memory view on nd-array
+        The "indices" attribute of the input CSR matrix.
+
+    indptr : memory view on nd-array
+        The "indptr" attribute of the input CSR matrix.
+
+    n_features : int
+        The dimensionality of the input CSR matrix.
+
+    result_data : nd-array
+        The output CSR matrix's "data" attribute.
+        It is modified by this routine.
+
+    result_indices : nd-array
+        The output CSR matrix's "indices" attribute.
+        It is modified by this routine.
+
+    result_indptr : nd-array
+        The output CSR matrix's "indptr" attribute.
+        It is modified by this routine.
+
+    interaction_only : int
+        0 for a polynomial expansion, 1 for an interaction expansion.
+
+    degree : int
+        The degree of the expansion. This must be either 2 or 3.
+
+    References
+    ----------
+    "Leveraging Sparsity to Speed Up Polynomial Feature Expansions of CSR
+    Matrices Using K-Simplex Numbers" by Andrew Nystrom and John Hughes.
+    """
+
+    # Make the arrays that will form the CSR matrix of the expansion.
+    cdef INDEX_A_t row_i, row_starts, row_ends, i, j, k, i_ptr, j_ptr, k_ptr
+    cdef INDEX_B_t expanded_index=0, num_cols_in_row, col
+    with nogil:
+        result_indptr[0] = indptr[0]
+        for row_i in range(indptr.shape[0]-1):
+            row_starts = indptr[row_i]
+            row_ends = indptr[row_i + 1]
+            num_cols_in_row = 0
+            for i_ptr in range(row_starts, row_ends):
+                i = indices[i_ptr]
+                for j_ptr in range(i_ptr + interaction_only, row_ends):
+                    j = indices[j_ptr]
+                    if degree == 2:
+                        col = <INDEX_B_t> _deg2_column(
+                            n_features,
+                            i, j,
+                            interaction_only
+                        )
+                        result_indices[expanded_index] = col
+                        result_data[expanded_index] = (
+                            data[i_ptr] * data[j_ptr]
+                        )
+                        expanded_index += 1
+                        num_cols_in_row += 1
+                    else:
+                        # degree == 3
+                        for k_ptr in range(j_ptr + interaction_only, row_ends):
+                            k = indices[k_ptr]
+                            col = <INDEX_B_t> _deg3_column(
+                                n_features,
+                                i, j, k,
+                                interaction_only
+                            )
+                            result_indices[expanded_index] = col
+                            result_data[expanded_index] = (
+                                data[i_ptr] * data[j_ptr] * data[k_ptr]
+                            )
+                            expanded_index += 1
+                            num_cols_in_row += 1
+
+            result_indptr[row_i+1] = result_indptr[row_i] + num_cols_in_row
+    return
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_data.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_data.py
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_discretization.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_discretization.py
@@ -0,0 +1,548 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+import warnings
+from numbers import Integral
+
+import numpy as np
+
+from ..base import BaseEstimator, TransformerMixin, _fit_context
+from ..utils import resample
+from ..utils._param_validation import Interval, Options, StrOptions
+from ..utils.stats import _averaged_weighted_percentile, _weighted_percentile
+from ..utils.validation import (
+    _check_feature_names_in,
+    _check_sample_weight,
+    check_array,
+    check_is_fitted,
+    validate_data,
+)
+from ._encoders import OneHotEncoder
+
+
+class KBinsDiscretizer(TransformerMixin, BaseEstimator):
+    """
+    Bin continuous data into intervals.
+
+    Read more in the :ref:`User Guide <preprocessing_discretization>`.
+
+    .. versionadded:: 0.20
+
+    Parameters
+    ----------
+    n_bins : int or array-like of shape (n_features,), default=5
+        The number of bins to produce. Raises ValueError if ``n_bins < 2``.
+
+    encode : {'onehot', 'onehot-dense', 'ordinal'}, default='onehot'
+        Method used to encode the transformed result.
+
+        - 'onehot': Encode the transformed result with one-hot encoding
+          and return a sparse matrix. Ignored features are always
+          stacked to the right.
+        - 'onehot-dense': Encode the transformed result with one-hot encoding
+          and return a dense array. Ignored features are always
+          stacked to the right.
+        - 'ordinal': Return the bin identifier encoded as an integer value.
+
+    strategy : {'uniform', 'quantile', 'kmeans'}, default='quantile'
+        Strategy used to define the widths of the bins.
+
+        - 'uniform': All bins in each feature have identical widths.
+        - 'quantile': All bins in each feature have the same number of points.
+        - 'kmeans': Values in each bin have the same nearest center of a 1D
+          k-means cluster.
+
+        For an example of the different strategies see:
+        :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_strategies.py`.
+
+    quantile_method : {"inverted_cdf", "averaged_inverted_cdf",
+            "closest_observation", "interpolated_inverted_cdf", "hazen",
+            "weibull", "linear", "median_unbiased", "normal_unbiased"},
+            default="linear"
+            Method to pass on to np.percentile calculation when using
+            strategy="quantile". Only `averaged_inverted_cdf` and `inverted_cdf`
+            support the use of `sample_weight != None` when subsampling is not
+            active.
+
+            .. versionadded:: 1.7
+
+    dtype : {np.float32, np.float64}, default=None
+        The desired data-type for the output. If None, output dtype is
+        consistent with input dtype. Only np.float32 and np.float64 are
+        supported.
+
+        .. versionadded:: 0.24
+
+    subsample : int or None, default=200_000
+        Maximum number of samples, used to fit the model, for computational
+        efficiency.
+        `subsample=None` means that all the training samples are used when
+        computing the quantiles that determine the binning thresholds.
+        Since quantile computation relies on sorting each column of `X` and
+        that sorting has an `n log(n)` time complexity,
+        it is recommended to use subsampling on datasets with a
+        very large number of samples.
+
+        .. versionchanged:: 1.3
+            The default value of `subsample` changed from `None` to `200_000` when
+            `strategy="quantile"`.
+
+        .. versionchanged:: 1.5
+            The default value of `subsample` changed from `None` to `200_000` when
+            `strategy="uniform"` or `strategy="kmeans"`.
+
+    random_state : int, RandomState instance or None, default=None
+        Determines random number generation for subsampling.
+        Pass an int for reproducible results across multiple function calls.
+        See the `subsample` parameter for more details.
+        See :term:`Glossary <random_state>`.
+
+        .. versionadded:: 1.1
+
+    Attributes
+    ----------
+    bin_edges_ : ndarray of ndarray of shape (n_features,)
+        The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``
+        Ignored features will have empty arrays.
+
+    n_bins_ : ndarray of shape (n_features,), dtype=np.int64
+        Number of bins per feature. Bins whose width are too small
+        (i.e., <= 1e-8) are removed with a warning.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    Binarizer : Class used to bin values as ``0`` or
+        ``1`` based on a parameter ``threshold``.
+
+    Notes
+    -----
+
+    For a visualization of discretization on different datasets refer to
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_classification.py`.
+    On the effect of discretization on linear models see:
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization.py`.
+
+    In bin edges for feature ``i``, the first and last values are used only for
+    ``inverse_transform``. During transform, bin edges are extended to::
+
+      np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])
+
+    You can combine ``KBinsDiscretizer`` with
+    :class:`~sklearn.compose.ColumnTransformer` if you only want to preprocess
+    part of the features.
+
+    ``KBinsDiscretizer`` might produce constant features (e.g., when
+    ``encode = 'onehot'`` and certain bins do not contain any data).
+    These features can be removed with feature selection algorithms
+    (e.g., :class:`~sklearn.feature_selection.VarianceThreshold`).
+
+    Examples
+    --------
+    >>> from sklearn.preprocessing import KBinsDiscretizer
+    >>> X = [[-2, 1, -4,   -1],
+    ...      [-1, 2, -3, -0.5],
+    ...      [ 0, 3, -2,  0.5],
+    ...      [ 1, 4, -1,    2]]
+    >>> est = KBinsDiscretizer(
+    ...     n_bins=3, encode='ordinal', strategy='uniform'
+    ... )
+    >>> est.fit(X)
+    KBinsDiscretizer(...)
+    >>> Xt = est.transform(X)
+    >>> Xt  # doctest: +SKIP
+    array([[ 0., 0., 0., 0.],
+           [ 1., 1., 1., 0.],
+           [ 2., 2., 2., 1.],
+           [ 2., 2., 2., 2.]])
+
+    Sometimes it may be useful to convert the data back into the original
+    feature space. The ``inverse_transform`` function converts the binned
+    data into the original feature space. Each value will be equal to the mean
+    of the two bin edges.
+
+    >>> est.bin_edges_[0]
+    array([-2., -1.,  0.,  1.])
+    >>> est.inverse_transform(Xt)
+    array([[-1.5,  1.5, -3.5, -0.5],
+           [-0.5,  2.5, -2.5, -0.5],
+           [ 0.5,  3.5, -1.5,  0.5],
+           [ 0.5,  3.5, -1.5,  1.5]])
+    """
+
+    _parameter_constraints: dict = {
+        "n_bins": [Interval(Integral, 2, None, closed="left"), "array-like"],
+        "encode": [StrOptions({"onehot", "onehot-dense", "ordinal"})],
+        "strategy": [StrOptions({"uniform", "quantile", "kmeans"})],
+        "quantile_method": [
+            StrOptions(
+                {
+                    "warn",
+                    "inverted_cdf",
+                    "averaged_inverted_cdf",
+                    "closest_observation",
+                    "interpolated_inverted_cdf",
+                    "hazen",
+                    "weibull",
+                    "linear",
+                    "median_unbiased",
+                    "normal_unbiased",
+                }
+            )
+        ],
+        "dtype": [Options(type, {np.float64, np.float32}), None],
+        "subsample": [Interval(Integral, 1, None, closed="left"), None],
+        "random_state": ["random_state"],
+    }
+
+    def __init__(
+        self,
+        n_bins=5,
+        *,
+        encode="onehot",
+        strategy="quantile",
+        quantile_method="warn",
+        dtype=None,
+        subsample=200_000,
+        random_state=None,
+    ):
+        self.n_bins = n_bins
+        self.encode = encode
+        self.strategy = strategy
+        self.quantile_method = quantile_method
+        self.dtype = dtype
+        self.subsample = subsample
+        self.random_state = random_state
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None, sample_weight=None):
+        """
+        Fit the estimator.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Data to be discretized.
+
+        y : None
+            Ignored. This parameter exists only for compatibility with
+            :class:`~sklearn.pipeline.Pipeline`.
+
+        sample_weight : ndarray of shape (n_samples,)
+            Contains weight values to be associated with each sample.
+
+            .. versionadded:: 1.3
+
+            .. versionchanged:: 1.7
+               Added support for strategy="uniform".
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        X = validate_data(self, X, dtype="numeric")
+
+        if self.dtype in (np.float64, np.float32):
+            output_dtype = self.dtype
+        else:  # self.dtype is None
+            output_dtype = X.dtype
+
+        n_samples, n_features = X.shape
+
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
+
+        if self.subsample is not None and n_samples > self.subsample:
+            # Take a subsample of `X`
+            # When resampling, it is important to subsample **with replacement** to
+            # preserve the distribution, in particular in the presence of a few data
+            # points with large weights. You can check this by setting `replace=False`
+            # in sklearn.utils.test.test_indexing.test_resample_weighted and check that
+            # it fails as a justification for this claim.
+            X = resample(
+                X,
+                replace=True,
+                n_samples=self.subsample,
+                random_state=self.random_state,
+                sample_weight=sample_weight,
+            )
+            # Since we already used the weights when resampling when provided,
+            # we set them back to `None` to avoid accounting for the weights twice
+            # in subsequent operations to compute weight-aware bin edges with
+            # quantiles or k-means.
+            sample_weight = None
+
+        n_features = X.shape[1]
+        n_bins = self._validate_n_bins(n_features)
+
+        bin_edges = np.zeros(n_features, dtype=object)
+
+        # TODO(1.9): remove and switch to quantile_method="averaged_inverted_cdf"
+        # by default.
+        quantile_method = self.quantile_method
+        if self.strategy == "quantile" and quantile_method == "warn":
+            warnings.warn(
+                "The current default behavior, quantile_method='linear', will be "
+                "changed to quantile_method='averaged_inverted_cdf' in "
+                "scikit-learn version 1.9 to naturally support sample weight "
+                "equivalence properties by default. Pass "
+                "quantile_method='averaged_inverted_cdf' explicitly to silence this "
+                "warning.",
+                FutureWarning,
+            )
+            quantile_method = "linear"
+
+        if (
+            self.strategy == "quantile"
+            and quantile_method not in ["inverted_cdf", "averaged_inverted_cdf"]
+            and sample_weight is not None
+        ):
+            raise ValueError(
+                "When fitting with strategy='quantile' and sample weights, "
+                "quantile_method should either be set to 'averaged_inverted_cdf' or "
+                f"'inverted_cdf', got quantile_method='{quantile_method}' instead."
+            )
+
+        if self.strategy != "quantile" and sample_weight is not None:
+            # Prepare a mask to filter out zero-weight samples when extracting
+            # the min and max values of each columns which are needed for the
+            # "uniform" and "kmeans" strategies.
+            nnz_weight_mask = sample_weight != 0
+        else:
+            # Otherwise, all samples are used. Use a slice to avoid creating a
+            # new array.
+            nnz_weight_mask = slice(None)
+
+        for jj in range(n_features):
+            column = X[:, jj]
+            col_min = column[nnz_weight_mask].min()
+            col_max = column[nnz_weight_mask].max()
+
+            if col_min == col_max:
+                warnings.warn(
+                    "Feature %d is constant and will be replaced with 0." % jj
+                )
+                n_bins[jj] = 1
+                bin_edges[jj] = np.array([-np.inf, np.inf])
+                continue
+
+            if self.strategy == "uniform":
+                bin_edges[jj] = np.linspace(col_min, col_max, n_bins[jj] + 1)
+
+            elif self.strategy == "quantile":
+                percentile_levels = np.linspace(0, 100, n_bins[jj] + 1)
+
+                # method="linear" is the implicit default for any numpy
+                # version. So we keep it version independent in that case by
+                # using an empty param dict.
+                percentile_kwargs = {}
+                if quantile_method != "linear" and sample_weight is None:
+                    percentile_kwargs["method"] = quantile_method
+
+                if sample_weight is None:
+                    bin_edges[jj] = np.asarray(
+                        np.percentile(column, percentile_levels, **percentile_kwargs),
+                        dtype=np.float64,
+                    )
+                else:
+                    # TODO: make _weighted_percentile and
+                    # _averaged_weighted_percentile accept an array of
+                    # quantiles instead of calling it multiple times and
+                    # sorting the column multiple times as a result.
+                    percentile_func = {
+                        "inverted_cdf": _weighted_percentile,
+                        "averaged_inverted_cdf": _averaged_weighted_percentile,
+                    }[quantile_method]
+                    bin_edges[jj] = np.asarray(
+                        [
+                            percentile_func(column, sample_weight, percentile_rank=p)
+                            for p in percentile_levels
+                        ],
+                        dtype=np.float64,
+                    )
+            elif self.strategy == "kmeans":
+                from ..cluster import KMeans  # fixes import loops
+
+                # Deterministic initialization with uniform spacing
+                uniform_edges = np.linspace(col_min, col_max, n_bins[jj] + 1)
+                init = (uniform_edges[1:] + uniform_edges[:-1])[:, None] * 0.5
+
+                # 1D k-means procedure
+                km = KMeans(n_clusters=n_bins[jj], init=init, n_init=1)
+                centers = km.fit(
+                    column[:, None], sample_weight=sample_weight
+                ).cluster_centers_[:, 0]
+                # Must sort, centers may be unsorted even with sorted init
+                centers.sort()
+                bin_edges[jj] = (centers[1:] + centers[:-1]) * 0.5
+                bin_edges[jj] = np.r_[col_min, bin_edges[jj], col_max]
+
+            # Remove bins whose width are too small (i.e., <= 1e-8)
+            if self.strategy in ("quantile", "kmeans"):
+                mask = np.ediff1d(bin_edges[jj], to_begin=np.inf) > 1e-8
+                bin_edges[jj] = bin_edges[jj][mask]
+                if len(bin_edges[jj]) - 1 != n_bins[jj]:
+                    warnings.warn(
+                        "Bins whose width are too small (i.e., <= "
+                        "1e-8) in feature %d are removed. Consider "
+                        "decreasing the number of bins." % jj
+                    )
+                    n_bins[jj] = len(bin_edges[jj]) - 1
+
+        self.bin_edges_ = bin_edges
+        self.n_bins_ = n_bins
+
+        if "onehot" in self.encode:
+            self._encoder = OneHotEncoder(
+                categories=[np.arange(i) for i in self.n_bins_],
+                sparse_output=self.encode == "onehot",
+                dtype=output_dtype,
+            )
+            # Fit the OneHotEncoder with toy datasets
+            # so that it's ready for use after the KBinsDiscretizer is fitted
+            self._encoder.fit(np.zeros((1, len(self.n_bins_))))
+
+        return self
+
+    def _validate_n_bins(self, n_features):
+        """Returns n_bins_, the number of bins per feature."""
+        orig_bins = self.n_bins
+        if isinstance(orig_bins, Integral):
+            return np.full(n_features, orig_bins, dtype=int)
+
+        n_bins = check_array(orig_bins, dtype=int, copy=True, ensure_2d=False)
+
+        if n_bins.ndim > 1 or n_bins.shape[0] != n_features:
+            raise ValueError("n_bins must be a scalar or array of shape (n_features,).")
+
+        bad_nbins_value = (n_bins < 2) | (n_bins != orig_bins)
+
+        violating_indices = np.where(bad_nbins_value)[0]
+        if violating_indices.shape[0] > 0:
+            indices = ", ".join(str(i) for i in violating_indices)
+            raise ValueError(
+                "{} received an invalid number "
+                "of bins at indices {}. Number of bins "
+                "must be at least 2, and must be an int.".format(
+                    KBinsDiscretizer.__name__, indices
+                )
+            )
+        return n_bins
+
+    def transform(self, X):
+        """
+        Discretize the data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Data to be discretized.
+
+        Returns
+        -------
+        Xt : {ndarray, sparse matrix}, dtype={np.float32, np.float64}
+            Data in the binned space. Will be a sparse matrix if
+            `self.encode='onehot'` and ndarray otherwise.
+        """
+        check_is_fitted(self)
+
+        # check input and attribute dtypes
+        dtype = (np.float64, np.float32) if self.dtype is None else self.dtype
+        Xt = validate_data(self, X, copy=True, dtype=dtype, reset=False)
+
+        bin_edges = self.bin_edges_
+        for jj in range(Xt.shape[1]):
+            Xt[:, jj] = np.searchsorted(bin_edges[jj][1:-1], Xt[:, jj], side="right")
+
+        if self.encode == "ordinal":
+            return Xt
+
+        dtype_init = None
+        if "onehot" in self.encode:
+            dtype_init = self._encoder.dtype
+            self._encoder.dtype = Xt.dtype
+        try:
+            Xt_enc = self._encoder.transform(Xt)
+        finally:
+            # revert the initial dtype to avoid modifying self.
+            self._encoder.dtype = dtype_init
+        return Xt_enc
+
+    def inverse_transform(self, X):
+        """
+        Transform discretized data back to original feature space.
+
+        Note that this function does not regenerate the original data
+        due to discretization rounding.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Transformed data in the binned space.
+
+        Returns
+        -------
+        X_original : ndarray, dtype={np.float32, np.float64}
+            Data in the original feature space.
+        """
+
+        check_is_fitted(self)
+
+        if "onehot" in self.encode:
+            X = self._encoder.inverse_transform(X)
+
+        Xinv = check_array(X, copy=True, dtype=(np.float64, np.float32))
+        n_features = self.n_bins_.shape[0]
+        if Xinv.shape[1] != n_features:
+            raise ValueError(
+                "Incorrect number of features. Expecting {}, received {}.".format(
+                    n_features, Xinv.shape[1]
+                )
+            )
+
+        for jj in range(n_features):
+            bin_edges = self.bin_edges_[jj]
+            bin_centers = (bin_edges[1:] + bin_edges[:-1]) * 0.5
+            Xinv[:, jj] = bin_centers[(Xinv[:, jj]).astype(np.int64)]
+
+        return Xinv
+
+    def get_feature_names_out(self, input_features=None):
+        """Get output feature names.
+
+        Parameters
+        ----------
+        input_features : array-like of str or None, default=None
+            Input features.
+
+            - If `input_features` is `None`, then `feature_names_in_` is
+              used as feature names in. If `feature_names_in_` is not defined,
+              then the following input feature names are generated:
+              `["x0", "x1", ..., "x(n_features_in_ - 1)"]`.
+            - If `input_features` is an array-like, then `input_features` must
+              match `feature_names_in_` if `feature_names_in_` is defined.
+
+        Returns
+        -------
+        feature_names_out : ndarray of str objects
+            Transformed feature names.
+        """
+        check_is_fitted(self, "n_features_in_")
+        input_features = _check_feature_names_in(self, input_features)
+        if hasattr(self, "_encoder"):
+            return self._encoder.get_feature_names_out(input_features)
+
+        # ordinal encoding
+        return input_features
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_encoders.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_encoders.py
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_function_transformer.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_function_transformer.py
@@ -0,0 +1,449 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+from functools import partial
+
+import numpy as np
+
+from ..base import BaseEstimator, TransformerMixin, _fit_context
+from ..utils._param_validation import StrOptions
+from ..utils._repr_html.estimator import _VisualBlock
+from ..utils._set_output import (
+    _get_adapter_from_container,
+    _get_output_config,
+)
+from ..utils.metaestimators import available_if
+from ..utils.validation import (
+    _allclose_dense_sparse,
+    _check_feature_names,
+    _check_feature_names_in,
+    _check_n_features,
+    _get_feature_names,
+    _is_pandas_df,
+    _is_polars_df,
+    check_array,
+    validate_data,
+)
+
+
+def _identity(X):
+    """The identity function."""
+    return X
+
+
+class FunctionTransformer(TransformerMixin, BaseEstimator):
+    """Constructs a transformer from an arbitrary callable.
+
+    A FunctionTransformer forwards its X (and optionally y) arguments to a
+    user-defined function or function object and returns the result of this
+    function. This is useful for stateless transformations such as taking the
+    log of frequencies, doing custom scaling, etc.
+
+    Note: If a lambda is used as the function, then the resulting
+    transformer will not be pickleable.
+
+    .. versionadded:: 0.17
+
+    Read more in the :ref:`User Guide <function_transformer>`.
+
+    Parameters
+    ----------
+    func : callable, default=None
+        The callable to use for the transformation. This will be passed
+        the same arguments as transform, with args and kwargs forwarded.
+        If func is None, then func will be the identity function.
+
+    inverse_func : callable, default=None
+        The callable to use for the inverse transformation. This will be
+        passed the same arguments as inverse transform, with args and
+        kwargs forwarded. If inverse_func is None, then inverse_func
+        will be the identity function.
+
+    validate : bool, default=False
+        Indicate that the input X array should be checked before calling
+        ``func``. The possibilities are:
+
+        - If False, there is no input validation.
+        - If True, then X will be converted to a 2-dimensional NumPy array or
+          sparse matrix. If the conversion is not possible an exception is
+          raised.
+
+        .. versionchanged:: 0.22
+           The default of ``validate`` changed from True to False.
+
+    accept_sparse : bool, default=False
+        Indicate that func accepts a sparse matrix as input. If validate is
+        False, this has no effect. Otherwise, if accept_sparse is false,
+        sparse matrix inputs will cause an exception to be raised.
+
+    check_inverse : bool, default=True
+       Whether to check that or ``func`` followed by ``inverse_func`` leads to
+       the original inputs. It can be used for a sanity check, raising a
+       warning when the condition is not fulfilled.
+
+       .. versionadded:: 0.20
+
+    feature_names_out : callable, 'one-to-one' or None, default=None
+        Determines the list of feature names that will be returned by the
+        `get_feature_names_out` method. If it is 'one-to-one', then the output
+        feature names will be equal to the input feature names. If it is a
+        callable, then it must take two positional arguments: this
+        `FunctionTransformer` (`self`) and an array-like of input feature names
+        (`input_features`). It must return an array-like of output feature
+        names. The `get_feature_names_out` method is only defined if
+        `feature_names_out` is not None.
+
+        See ``get_feature_names_out`` for more details.
+
+        .. versionadded:: 1.1
+
+    kw_args : dict, default=None
+        Dictionary of additional keyword arguments to pass to func.
+
+        .. versionadded:: 0.18
+
+    inv_kw_args : dict, default=None
+        Dictionary of additional keyword arguments to pass to inverse_func.
+
+        .. versionadded:: 0.18
+
+    Attributes
+    ----------
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X` has feature
+        names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    MaxAbsScaler : Scale each feature by its maximum absolute value.
+    StandardScaler : Standardize features by removing the mean and
+        scaling to unit variance.
+    LabelBinarizer : Binarize labels in a one-vs-all fashion.
+    MultiLabelBinarizer : Transform between iterable of iterables
+        and a multilabel format.
+
+    Notes
+    -----
+    If `func` returns an output with a `columns` attribute, then the columns is enforced
+    to be consistent with the output of `get_feature_names_out`.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.preprocessing import FunctionTransformer
+    >>> transformer = FunctionTransformer(np.log1p)
+    >>> X = np.array([[0, 1], [2, 3]])
+    >>> transformer.transform(X)
+    array([[0.       , 0.6931],
+           [1.0986, 1.3862]])
+    """
+
+    _parameter_constraints: dict = {
+        "func": [callable, None],
+        "inverse_func": [callable, None],
+        "validate": ["boolean"],
+        "accept_sparse": ["boolean"],
+        "check_inverse": ["boolean"],
+        "feature_names_out": [callable, StrOptions({"one-to-one"}), None],
+        "kw_args": [dict, None],
+        "inv_kw_args": [dict, None],
+    }
+
+    def __init__(
+        self,
+        func=None,
+        inverse_func=None,
+        *,
+        validate=False,
+        accept_sparse=False,
+        check_inverse=True,
+        feature_names_out=None,
+        kw_args=None,
+        inv_kw_args=None,
+    ):
+        self.func = func
+        self.inverse_func = inverse_func
+        self.validate = validate
+        self.accept_sparse = accept_sparse
+        self.check_inverse = check_inverse
+        self.feature_names_out = feature_names_out
+        self.kw_args = kw_args
+        self.inv_kw_args = inv_kw_args
+
+    def _check_input(self, X, *, reset):
+        if self.validate:
+            return validate_data(self, X, accept_sparse=self.accept_sparse, reset=reset)
+        elif reset:
+            # Set feature_names_in_ and n_features_in_ even if validate=False
+            # We run this only when reset==True to store the attributes but not
+            # validate them, because validate=False
+            _check_n_features(self, X, reset=reset)
+            _check_feature_names(self, X, reset=reset)
+        return X
+
+    def _check_inverse_transform(self, X):
+        """Check that func and inverse_func are the inverse."""
+        idx_selected = slice(None, None, max(1, X.shape[0] // 100))
+        X_round_trip = self.inverse_transform(self.transform(X[idx_selected]))
+
+        if hasattr(X, "dtype"):
+            dtypes = [X.dtype]
+        elif hasattr(X, "dtypes"):
+            # Dataframes can have multiple dtypes
+            dtypes = X.dtypes
+
+        # Not all dtypes are numpy dtypes, they can be pandas dtypes as well
+        if not all(
+            isinstance(d, np.dtype) and np.issubdtype(d, np.number) for d in dtypes
+        ):
+            raise ValueError(
+                "'check_inverse' is only supported when all the elements in `X` is"
+                " numerical."
+            )
+
+        if not _allclose_dense_sparse(X[idx_selected], X_round_trip):
+            warnings.warn(
+                (
+                    "The provided functions are not strictly"
+                    " inverse of each other. If you are sure you"
+                    " want to proceed regardless, set"
+                    " 'check_inverse=False'."
+                ),
+                UserWarning,
+            )
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit transformer by checking X.
+
+        If ``validate`` is ``True``, ``X`` will be checked.
+
+        Parameters
+        ----------
+        X : {array-like, sparse-matrix} of shape (n_samples, n_features) \
+                if `validate=True` else any object that `func` can handle
+            Input array.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            FunctionTransformer class instance.
+        """
+        X = self._check_input(X, reset=True)
+        if self.check_inverse and not (self.func is None or self.inverse_func is None):
+            self._check_inverse_transform(X)
+        return self
+
+    def transform(self, X):
+        """Transform X using the forward function.
+
+        Parameters
+        ----------
+        X : {array-like, sparse-matrix} of shape (n_samples, n_features) \
+                if `validate=True` else any object that `func` can handle
+            Input array.
+
+        Returns
+        -------
+        X_out : array-like, shape (n_samples, n_features)
+            Transformed input.
+        """
+        X = self._check_input(X, reset=False)
+        out = self._transform(X, func=self.func, kw_args=self.kw_args)
+        output_config = _get_output_config("transform", self)["dense"]
+
+        if hasattr(out, "columns") and self.feature_names_out is not None:
+            # check the consistency between the column provided by `transform` and
+            # the column names provided by `get_feature_names_out`.
+            feature_names_out = self.get_feature_names_out()
+            if list(out.columns) != list(feature_names_out):
+                # we can override the column names of the output if it is inconsistent
+                # with the column names provided by `get_feature_names_out` in the
+                # following cases:
+                # * `func` preserved the column names between the input and the output
+                # * the input column names are all numbers
+                # * the output is requested to be a DataFrame (pandas or polars)
+                feature_names_in = getattr(
+                    X, "feature_names_in_", _get_feature_names(X)
+                )
+                same_feature_names_in_out = feature_names_in is not None and list(
+                    feature_names_in
+                ) == list(out.columns)
+                not_all_str_columns = not all(
+                    isinstance(col, str) for col in out.columns
+                )
+                if same_feature_names_in_out or not_all_str_columns:
+                    adapter = _get_adapter_from_container(out)
+                    out = adapter.create_container(
+                        X_output=out,
+                        X_original=out,
+                        columns=feature_names_out,
+                        inplace=False,
+                    )
+                else:
+                    raise ValueError(
+                        "The output generated by `func` have different column names "
+                        "than the ones provided by `get_feature_names_out`. "
+                        f"Got output with columns names: {list(out.columns)} and "
+                        "`get_feature_names_out` returned: "
+                        f"{list(self.get_feature_names_out())}. "
+                        "The column names can be overridden by setting "
+                        "`set_output(transform='pandas')` or "
+                        "`set_output(transform='polars')` such that the column names "
+                        "are set to the names provided by `get_feature_names_out`."
+                    )
+
+        if self.feature_names_out is None:
+            warn_msg = (
+                "When `set_output` is configured to be '{0}', `func` should return "
+                "a {0} DataFrame to follow the `set_output` API  or `feature_names_out`"
+                " should be defined."
+            )
+            if output_config == "pandas" and not _is_pandas_df(out):
+                warnings.warn(warn_msg.format("pandas"))
+            elif output_config == "polars" and not _is_polars_df(out):
+                warnings.warn(warn_msg.format("polars"))
+
+        return out
+
+    def inverse_transform(self, X):
+        """Transform X using the inverse function.
+
+        Parameters
+        ----------
+        X : {array-like, sparse-matrix} of shape (n_samples, n_features) \
+                if `validate=True` else any object that `inverse_func` can handle
+            Input array.
+
+        Returns
+        -------
+        X_original : array-like, shape (n_samples, n_features)
+            Transformed input.
+        """
+        if self.validate:
+            X = check_array(X, accept_sparse=self.accept_sparse)
+        return self._transform(X, func=self.inverse_func, kw_args=self.inv_kw_args)
+
+    @available_if(lambda self: self.feature_names_out is not None)
+    def get_feature_names_out(self, input_features=None):
+        """Get output feature names for transformation.
+
+        This method is only defined if `feature_names_out` is not None.
+
+        Parameters
+        ----------
+        input_features : array-like of str or None, default=None
+            Input feature names.
+
+            - If `input_features` is None, then `feature_names_in_` is
+              used as the input feature names. If `feature_names_in_` is not
+              defined, then names are generated:
+              `[x0, x1, ..., x(n_features_in_ - 1)]`.
+            - If `input_features` is array-like, then `input_features` must
+              match `feature_names_in_` if `feature_names_in_` is defined.
+
+        Returns
+        -------
+        feature_names_out : ndarray of str objects
+            Transformed feature names.
+
+            - If `feature_names_out` is 'one-to-one', the input feature names
+              are returned (see `input_features` above). This requires
+              `feature_names_in_` and/or `n_features_in_` to be defined, which
+              is done automatically if `validate=True`. Alternatively, you can
+              set them in `func`.
+            - If `feature_names_out` is a callable, then it is called with two
+              arguments, `self` and `input_features`, and its return value is
+              returned by this method.
+        """
+        if hasattr(self, "n_features_in_") or input_features is not None:
+            input_features = _check_feature_names_in(self, input_features)
+        if self.feature_names_out == "one-to-one":
+            names_out = input_features
+        elif callable(self.feature_names_out):
+            names_out = self.feature_names_out(self, input_features)
+        else:
+            raise ValueError(
+                f"feature_names_out={self.feature_names_out!r} is invalid. "
+                'It must either be "one-to-one" or a callable with two '
+                "arguments: the function transformer and an array-like of "
+                "input feature names. The callable must return an array-like "
+                "of output feature names."
+            )
+        return np.asarray(names_out, dtype=object)
+
+    def _transform(self, X, func=None, kw_args=None):
+        if func is None:
+            func = _identity
+
+        return func(X, **(kw_args if kw_args else {}))
+
+    def __sklearn_is_fitted__(self):
+        """Return True since FunctionTransfomer is stateless."""
+        return True
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.no_validation = not self.validate
+        tags.requires_fit = False
+        tags.input_tags.sparse = not self.validate or self.accept_sparse
+        return tags
+
+    def set_output(self, *, transform=None):
+        """Set output container.
+
+        See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`
+        for an example on how to use the API.
+
+        Parameters
+        ----------
+        transform : {"default", "pandas", "polars"}, default=None
+            Configure output of `transform` and `fit_transform`.
+
+            - `"default"`: Default output format of a transformer
+            - `"pandas"`: DataFrame output
+            - `"polars"`: Polars output
+            - `None`: Transform configuration is unchanged
+
+            .. versionadded:: 1.4
+                `"polars"` option was added.
+
+        Returns
+        -------
+        self : estimator instance
+            Estimator instance.
+        """
+        if not hasattr(self, "_sklearn_output_config"):
+            self._sklearn_output_config = {}
+
+        self._sklearn_output_config["transform"] = transform
+        return self
+
+    def _get_function_name(self):
+        """Get the name display of the `func` used in HTML representation."""
+        if hasattr(self.func, "__name__"):
+            return self.func.__name__
+        if isinstance(self.func, partial):
+            return self.func.func.__name__
+        return f"{self.func.__class__.__name__}(...)"
+
+    def _sk_visual_block_(self):
+        return _VisualBlock(
+            "single",
+            self,
+            names=self._get_function_name(),
+            name_details=str(self),
+            name_caption="FunctionTransformer",
+            doc_link_label="FunctionTransformer",
+        )
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_label.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_label.py
@@ -0,0 +1,963 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import array
+import itertools
+import warnings
+from collections import defaultdict
+from numbers import Integral
+
+import numpy as np
+import scipy.sparse as sp
+
+from ..base import BaseEstimator, TransformerMixin, _fit_context
+from ..utils import column_or_1d
+from ..utils._array_api import device, get_namespace, xpx
+from ..utils._encode import _encode, _unique
+from ..utils._param_validation import Interval, validate_params
+from ..utils.multiclass import type_of_target, unique_labels
+from ..utils.sparsefuncs import min_max_axis
+from ..utils.validation import _num_samples, check_array, check_is_fitted
+
+__all__ = [
+    "LabelBinarizer",
+    "LabelEncoder",
+    "MultiLabelBinarizer",
+    "label_binarize",
+]
+
+
+class LabelEncoder(TransformerMixin, BaseEstimator, auto_wrap_output_keys=None):
+    """Encode target labels with value between 0 and n_classes-1.
+
+    This transformer should be used to encode target values, *i.e.* `y`, and
+    not the input `X`.
+
+    Read more in the :ref:`User Guide <preprocessing_targets>`.
+
+    .. versionadded:: 0.12
+
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,)
+        Holds the label for each class.
+
+    See Also
+    --------
+    OrdinalEncoder : Encode categorical features using an ordinal encoding
+        scheme.
+    OneHotEncoder : Encode categorical features as a one-hot numeric array.
+
+    Examples
+    --------
+    `LabelEncoder` can be used to normalize labels.
+
+    >>> from sklearn.preprocessing import LabelEncoder
+    >>> le = LabelEncoder()
+    >>> le.fit([1, 2, 2, 6])
+    LabelEncoder()
+    >>> le.classes_
+    array([1, 2, 6])
+    >>> le.transform([1, 1, 2, 6])
+    array([0, 0, 1, 2]...)
+    >>> le.inverse_transform([0, 0, 1, 2])
+    array([1, 1, 2, 6])
+
+    It can also be used to transform non-numerical labels (as long as they are
+    hashable and comparable) to numerical labels.
+
+    >>> le = LabelEncoder()
+    >>> le.fit(["paris", "paris", "tokyo", "amsterdam"])
+    LabelEncoder()
+    >>> list(le.classes_)
+    [np.str_('amsterdam'), np.str_('paris'), np.str_('tokyo')]
+    >>> le.transform(["tokyo", "tokyo", "paris"])
+    array([2, 2, 1]...)
+    >>> list(le.inverse_transform([2, 2, 1]))
+    [np.str_('tokyo'), np.str_('tokyo'), np.str_('paris')]
+    """
+
+    def fit(self, y):
+        """Fit label encoder.
+
+        Parameters
+        ----------
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        self : returns an instance of self.
+            Fitted label encoder.
+        """
+        y = column_or_1d(y, warn=True)
+        self.classes_ = _unique(y)
+        return self
+
+    def fit_transform(self, y):
+        """Fit label encoder and return encoded labels.
+
+        Parameters
+        ----------
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        y : array-like of shape (n_samples,)
+            Encoded labels.
+        """
+        y = column_or_1d(y, warn=True)
+        self.classes_, y = _unique(y, return_inverse=True)
+        return y
+
+    def transform(self, y):
+        """Transform labels to normalized encoding.
+
+        Parameters
+        ----------
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        y : array-like of shape (n_samples,)
+            Labels as normalized encodings.
+        """
+        check_is_fitted(self)
+        xp, _ = get_namespace(y)
+        y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)
+        # transform of empty array is empty array
+        if _num_samples(y) == 0:
+            return xp.asarray([])
+
+        return _encode(y, uniques=self.classes_)
+
+    def inverse_transform(self, y):
+        """Transform labels back to original encoding.
+
+        Parameters
+        ----------
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        Returns
+        -------
+        y_original : ndarray of shape (n_samples,)
+            Original encoding.
+        """
+        check_is_fitted(self)
+        xp, _ = get_namespace(y)
+        y = column_or_1d(y, warn=True)
+        # inverse transform of empty array is empty array
+        if _num_samples(y) == 0:
+            return xp.asarray([])
+
+        diff = xpx.setdiff1d(
+            y,
+            xp.arange(self.classes_.shape[0], device=device(y)),
+            xp=xp,
+        )
+        if diff.shape[0]:
+            raise ValueError("y contains previously unseen labels: %s" % str(diff))
+        y = xp.asarray(y)
+        return xp.take(self.classes_, y, axis=0)
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.array_api_support = True
+        tags.input_tags.two_d_array = False
+        tags.target_tags.one_d_labels = True
+        return tags
+
+
+class LabelBinarizer(TransformerMixin, BaseEstimator, auto_wrap_output_keys=None):
+    """Binarize labels in a one-vs-all fashion.
+
+    Several regression and binary classification algorithms are
+    available in scikit-learn. A simple way to extend these algorithms
+    to the multi-class classification case is to use the so-called
+    one-vs-all scheme.
+
+    At learning time, this simply consists in learning one regressor
+    or binary classifier per class. In doing so, one needs to convert
+    multi-class labels to binary labels (belong or does not belong
+    to the class). `LabelBinarizer` makes this process easy with the
+    transform method.
+
+    At prediction time, one assigns the class for which the corresponding
+    model gave the greatest confidence. `LabelBinarizer` makes this easy
+    with the :meth:`inverse_transform` method.
+
+    Read more in the :ref:`User Guide <preprocessing_targets>`.
+
+    Parameters
+    ----------
+    neg_label : int, default=0
+        Value with which negative labels must be encoded.
+
+    pos_label : int, default=1
+        Value with which positive labels must be encoded.
+
+    sparse_output : bool, default=False
+        True if the returned array from transform is desired to be in sparse
+        CSR format.
+
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,)
+        Holds the label for each class.
+
+    y_type_ : str
+        Represents the type of the target data as evaluated by
+        :func:`~sklearn.utils.multiclass.type_of_target`. Possible type are
+        'continuous', 'continuous-multioutput', 'binary', 'multiclass',
+        'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.
+
+    sparse_input_ : bool
+        `True` if the input data to transform is given as a sparse matrix,
+         `False` otherwise.
+
+    See Also
+    --------
+    label_binarize : Function to perform the transform operation of
+        LabelBinarizer with fixed classes.
+    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K
+        scheme.
+
+    Examples
+    --------
+    >>> from sklearn.preprocessing import LabelBinarizer
+    >>> lb = LabelBinarizer()
+    >>> lb.fit([1, 2, 6, 4, 2])
+    LabelBinarizer()
+    >>> lb.classes_
+    array([1, 2, 4, 6])
+    >>> lb.transform([1, 6])
+    array([[1, 0, 0, 0],
+           [0, 0, 0, 1]])
+
+    Binary targets transform to a column vector
+
+    >>> lb = LabelBinarizer()
+    >>> lb.fit_transform(['yes', 'no', 'no', 'yes'])
+    array([[1],
+           [0],
+           [0],
+           [1]])
+
+    Passing a 2D matrix for multilabel classification
+
+    >>> import numpy as np
+    >>> lb.fit(np.array([[0, 1, 1], [1, 0, 0]]))
+    LabelBinarizer()
+    >>> lb.classes_
+    array([0, 1, 2])
+    >>> lb.transform([0, 1, 2, 1])
+    array([[1, 0, 0],
+           [0, 1, 0],
+           [0, 0, 1],
+           [0, 1, 0]])
+    """
+
+    _parameter_constraints: dict = {
+        "neg_label": [Integral],
+        "pos_label": [Integral],
+        "sparse_output": ["boolean"],
+    }
+
+    def __init__(self, *, neg_label=0, pos_label=1, sparse_output=False):
+        self.neg_label = neg_label
+        self.pos_label = pos_label
+        self.sparse_output = sparse_output
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, y):
+        """Fit label binarizer.
+
+        Parameters
+        ----------
+        y : ndarray of shape (n_samples,) or (n_samples, n_classes)
+            Target values. The 2-d matrix should only contain 0 and 1,
+            represents multilabel classification.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        if self.neg_label >= self.pos_label:
+            raise ValueError(
+                f"neg_label={self.neg_label} must be strictly less than "
+                f"pos_label={self.pos_label}."
+            )
+
+        if self.sparse_output and (self.pos_label == 0 or self.neg_label != 0):
+            raise ValueError(
+                "Sparse binarization is only supported with non "
+                "zero pos_label and zero neg_label, got "
+                f"pos_label={self.pos_label} and neg_label={self.neg_label}"
+            )
+
+        self.y_type_ = type_of_target(y, input_name="y")
+
+        if "multioutput" in self.y_type_:
+            raise ValueError(
+                "Multioutput target data is not supported with label binarization"
+            )
+        if _num_samples(y) == 0:
+            raise ValueError("y has 0 samples: %r" % y)
+
+        self.sparse_input_ = sp.issparse(y)
+        self.classes_ = unique_labels(y)
+        return self
+
+    def fit_transform(self, y):
+        """Fit label binarizer/transform multi-class labels to binary labels.
+
+        The output of transform is sometimes referred to as
+        the 1-of-K coding scheme.
+
+        Parameters
+        ----------
+        y : {ndarray, sparse matrix} of shape (n_samples,) or \
+                (n_samples, n_classes)
+            Target values. The 2-d matrix should only contain 0 and 1,
+            represents multilabel classification. Sparse matrix can be
+            CSR, CSC, COO, DOK, or LIL.
+
+        Returns
+        -------
+        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+            Shape will be (n_samples, 1) for binary problems. Sparse matrix
+            will be of CSR format.
+        """
+        return self.fit(y).transform(y)
+
+    def transform(self, y):
+        """Transform multi-class labels to binary labels.
+
+        The output of transform is sometimes referred to by some authors as
+        the 1-of-K coding scheme.
+
+        Parameters
+        ----------
+        y : {array, sparse matrix} of shape (n_samples,) or \
+                (n_samples, n_classes)
+            Target values. The 2-d matrix should only contain 0 and 1,
+            represents multilabel classification. Sparse matrix can be
+            CSR, CSC, COO, DOK, or LIL.
+
+        Returns
+        -------
+        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+            Shape will be (n_samples, 1) for binary problems. Sparse matrix
+            will be of CSR format.
+        """
+        check_is_fitted(self)
+
+        y_is_multilabel = type_of_target(y).startswith("multilabel")
+        if y_is_multilabel and not self.y_type_.startswith("multilabel"):
+            raise ValueError("The object was not fitted with multilabel input.")
+
+        return label_binarize(
+            y,
+            classes=self.classes_,
+            pos_label=self.pos_label,
+            neg_label=self.neg_label,
+            sparse_output=self.sparse_output,
+        )
+
+    def inverse_transform(self, Y, threshold=None):
+        """Transform binary labels back to multi-class labels.
+
+        Parameters
+        ----------
+        Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+            Target values. All sparse matrices are converted to CSR before
+            inverse transformation.
+
+        threshold : float, default=None
+            Threshold used in the binary and multi-label cases.
+
+            Use 0 when ``Y`` contains the output of :term:`decision_function`
+            (classifier).
+            Use 0.5 when ``Y`` contains the output of :term:`predict_proba`.
+
+            If None, the threshold is assumed to be half way between
+            neg_label and pos_label.
+
+        Returns
+        -------
+        y_original : {ndarray, sparse matrix} of shape (n_samples,)
+            Target values. Sparse matrix will be of CSR format.
+
+        Notes
+        -----
+        In the case when the binary labels are fractional
+        (probabilistic), :meth:`inverse_transform` chooses the class with the
+        greatest value. Typically, this allows to use the output of a
+        linear model's :term:`decision_function` method directly as the input
+        of :meth:`inverse_transform`.
+        """
+        check_is_fitted(self)
+
+        if threshold is None:
+            threshold = (self.pos_label + self.neg_label) / 2.0
+
+        if self.y_type_ == "multiclass":
+            y_inv = _inverse_binarize_multiclass(Y, self.classes_)
+        else:
+            y_inv = _inverse_binarize_thresholding(
+                Y, self.y_type_, self.classes_, threshold
+            )
+
+        if self.sparse_input_:
+            y_inv = sp.csr_matrix(y_inv)
+        elif sp.issparse(y_inv):
+            y_inv = y_inv.toarray()
+
+        return y_inv
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.two_d_array = False
+        tags.target_tags.one_d_labels = True
+        return tags
+
+
+@validate_params(
+    {
+        "y": ["array-like", "sparse matrix"],
+        "classes": ["array-like"],
+        "neg_label": [Interval(Integral, None, None, closed="neither")],
+        "pos_label": [Interval(Integral, None, None, closed="neither")],
+        "sparse_output": ["boolean"],
+    },
+    prefer_skip_nested_validation=True,
+)
+def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False):
+    """Binarize labels in a one-vs-all fashion.
+
+    Several regression and binary classification algorithms are
+    available in scikit-learn. A simple way to extend these algorithms
+    to the multi-class classification case is to use the so-called
+    one-vs-all scheme.
+
+    This function makes it possible to compute this transformation for a
+    fixed set of class labels known ahead of time.
+
+    Parameters
+    ----------
+    y : array-like or sparse matrix
+        Sequence of integer labels or multilabel data to encode.
+
+    classes : array-like of shape (n_classes,)
+        Uniquely holds the label for each class.
+
+    neg_label : int, default=0
+        Value with which negative labels must be encoded.
+
+    pos_label : int, default=1
+        Value with which positive labels must be encoded.
+
+    sparse_output : bool, default=False,
+        Set to true if output binary array is desired in CSR sparse format.
+
+    Returns
+    -------
+    Y : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+        Shape will be (n_samples, 1) for binary problems. Sparse matrix will
+        be of CSR format.
+
+    See Also
+    --------
+    LabelBinarizer : Class used to wrap the functionality of label_binarize and
+        allow for fitting to classes independently of the transform operation.
+
+    Examples
+    --------
+    >>> from sklearn.preprocessing import label_binarize
+    >>> label_binarize([1, 6], classes=[1, 2, 4, 6])
+    array([[1, 0, 0, 0],
+           [0, 0, 0, 1]])
+
+    The class ordering is preserved:
+
+    >>> label_binarize([1, 6], classes=[1, 6, 4, 2])
+    array([[1, 0, 0, 0],
+           [0, 1, 0, 0]])
+
+    Binary targets transform to a column vector
+
+    >>> label_binarize(['yes', 'no', 'no', 'yes'], classes=['no', 'yes'])
+    array([[1],
+           [0],
+           [0],
+           [1]])
+    """
+    if not isinstance(y, list):
+        # XXX Workaround that will be removed when list of list format is
+        # dropped
+        y = check_array(
+            y, input_name="y", accept_sparse="csr", ensure_2d=False, dtype=None
+        )
+    else:
+        if _num_samples(y) == 0:
+            raise ValueError("y has 0 samples: %r" % y)
+    if neg_label >= pos_label:
+        raise ValueError(
+            "neg_label={0} must be strictly less than pos_label={1}.".format(
+                neg_label, pos_label
+            )
+        )
+
+    if sparse_output and (pos_label == 0 or neg_label != 0):
+        raise ValueError(
+            "Sparse binarization is only supported with non "
+            "zero pos_label and zero neg_label, got "
+            "pos_label={0} and neg_label={1}"
+            "".format(pos_label, neg_label)
+        )
+
+    # To account for pos_label == 0 in the dense case
+    pos_switch = pos_label == 0
+    if pos_switch:
+        pos_label = -neg_label
+
+    y_type = type_of_target(y)
+    if "multioutput" in y_type:
+        raise ValueError(
+            "Multioutput target data is not supported with label binarization"
+        )
+    if y_type == "unknown":
+        raise ValueError("The type of target data is not known")
+
+    n_samples = y.shape[0] if sp.issparse(y) else len(y)
+    n_classes = len(classes)
+    classes = np.asarray(classes)
+
+    if y_type == "binary":
+        if n_classes == 1:
+            if sparse_output:
+                return sp.csr_matrix((n_samples, 1), dtype=int)
+            else:
+                Y = np.zeros((len(y), 1), dtype=int)
+                Y += neg_label
+                return Y
+        elif len(classes) >= 3:
+            y_type = "multiclass"
+
+    sorted_class = np.sort(classes)
+    if y_type == "multilabel-indicator":
+        y_n_classes = y.shape[1] if hasattr(y, "shape") else len(y[0])
+        if classes.size != y_n_classes:
+            raise ValueError(
+                "classes {0} mismatch with the labels {1} found in the data".format(
+                    classes, unique_labels(y)
+                )
+            )
+
+    if y_type in ("binary", "multiclass"):
+        y = column_or_1d(y)
+
+        # pick out the known labels from y
+        y_in_classes = np.isin(y, classes)
+        y_seen = y[y_in_classes]
+        indices = np.searchsorted(sorted_class, y_seen)
+        indptr = np.hstack((0, np.cumsum(y_in_classes)))
+
+        data = np.empty_like(indices)
+        data.fill(pos_label)
+        Y = sp.csr_matrix((data, indices, indptr), shape=(n_samples, n_classes))
+    elif y_type == "multilabel-indicator":
+        Y = sp.csr_matrix(y)
+        if pos_label != 1:
+            data = np.empty_like(Y.data)
+            data.fill(pos_label)
+            Y.data = data
+    else:
+        raise ValueError(
+            "%s target data is not supported with label binarization" % y_type
+        )
+
+    if not sparse_output:
+        Y = Y.toarray()
+        Y = Y.astype(int, copy=False)
+
+        if neg_label != 0:
+            Y[Y == 0] = neg_label
+
+        if pos_switch:
+            Y[Y == pos_label] = 0
+    else:
+        Y.data = Y.data.astype(int, copy=False)
+
+    # preserve label ordering
+    if np.any(classes != sorted_class):
+        indices = np.searchsorted(sorted_class, classes)
+        Y = Y[:, indices]
+
+    if y_type == "binary":
+        if sparse_output:
+            Y = Y[:, [-1]]
+        else:
+            Y = Y[:, -1].reshape((-1, 1))
+
+    return Y
+
+
+def _inverse_binarize_multiclass(y, classes):
+    """Inverse label binarization transformation for multiclass.
+
+    Multiclass uses the maximal score instead of a threshold.
+    """
+    classes = np.asarray(classes)
+
+    if sp.issparse(y):
+        # Find the argmax for each row in y where y is a CSR matrix
+
+        y = y.tocsr()
+        n_samples, n_outputs = y.shape
+        outputs = np.arange(n_outputs)
+        row_max = min_max_axis(y, 1)[1]
+        row_nnz = np.diff(y.indptr)
+
+        y_data_repeated_max = np.repeat(row_max, row_nnz)
+        # picks out all indices obtaining the maximum per row
+        y_i_all_argmax = np.flatnonzero(y_data_repeated_max == y.data)
+
+        # For corner case where last row has a max of 0
+        if row_max[-1] == 0:
+            y_i_all_argmax = np.append(y_i_all_argmax, [len(y.data)])
+
+        # Gets the index of the first argmax in each row from y_i_all_argmax
+        index_first_argmax = np.searchsorted(y_i_all_argmax, y.indptr[:-1])
+        # first argmax of each row
+        y_ind_ext = np.append(y.indices, [0])
+        y_i_argmax = y_ind_ext[y_i_all_argmax[index_first_argmax]]
+        # Handle rows of all 0
+        y_i_argmax[np.where(row_nnz == 0)[0]] = 0
+
+        # Handles rows with max of 0 that contain negative numbers
+        samples = np.arange(n_samples)[(row_nnz > 0) & (row_max.ravel() == 0)]
+        for i in samples:
+            ind = y.indices[y.indptr[i] : y.indptr[i + 1]]
+            y_i_argmax[i] = classes[np.setdiff1d(outputs, ind)][0]
+
+        return classes[y_i_argmax]
+    else:
+        return classes.take(y.argmax(axis=1), mode="clip")
+
+
+def _inverse_binarize_thresholding(y, output_type, classes, threshold):
+    """Inverse label binarization transformation using thresholding."""
+
+    if output_type == "binary" and y.ndim == 2 and y.shape[1] > 2:
+        raise ValueError("output_type='binary', but y.shape = {0}".format(y.shape))
+
+    if output_type != "binary" and y.shape[1] != len(classes):
+        raise ValueError(
+            "The number of class is not equal to the number of dimension of y."
+        )
+
+    classes = np.asarray(classes)
+
+    # Perform thresholding
+    if sp.issparse(y):
+        if threshold > 0:
+            if y.format not in ("csr", "csc"):
+                y = y.tocsr()
+            y.data = np.array(y.data > threshold, dtype=int)
+            y.eliminate_zeros()
+        else:
+            y = np.array(y.toarray() > threshold, dtype=int)
+    else:
+        y = np.array(y > threshold, dtype=int)
+
+    # Inverse transform data
+    if output_type == "binary":
+        if sp.issparse(y):
+            y = y.toarray()
+        if y.ndim == 2 and y.shape[1] == 2:
+            return classes[y[:, 1]]
+        else:
+            if len(classes) == 1:
+                return np.repeat(classes[0], len(y))
+            else:
+                return classes[y.ravel()]
+
+    elif output_type == "multilabel-indicator":
+        return y
+
+    else:
+        raise ValueError("{0} format is not supported".format(output_type))
+
+
+class MultiLabelBinarizer(TransformerMixin, BaseEstimator, auto_wrap_output_keys=None):
+    """Transform between iterable of iterables and a multilabel format.
+
+    Although a list of sets or tuples is a very intuitive format for multilabel
+    data, it is unwieldy to process. This transformer converts between this
+    intuitive format and the supported multilabel format: a (samples x classes)
+    binary matrix indicating the presence of a class label.
+
+    Parameters
+    ----------
+    classes : array-like of shape (n_classes,), default=None
+        Indicates an ordering for the class labels.
+        All entries should be unique (cannot contain duplicate classes).
+
+    sparse_output : bool, default=False
+        Set to True if output binary array is desired in CSR sparse format.
+
+    Attributes
+    ----------
+    classes_ : ndarray of shape (n_classes,)
+        A copy of the `classes` parameter when provided.
+        Otherwise it corresponds to the sorted set of classes found
+        when fitting.
+
+    See Also
+    --------
+    OneHotEncoder : Encode categorical features using a one-hot aka one-of-K
+        scheme.
+
+    Examples
+    --------
+    >>> from sklearn.preprocessing import MultiLabelBinarizer
+    >>> mlb = MultiLabelBinarizer()
+    >>> mlb.fit_transform([(1, 2), (3,)])
+    array([[1, 1, 0],
+           [0, 0, 1]])
+    >>> mlb.classes_
+    array([1, 2, 3])
+
+    >>> mlb.fit_transform([{'sci-fi', 'thriller'}, {'comedy'}])
+    array([[0, 1, 1],
+           [1, 0, 0]])
+    >>> list(mlb.classes_)
+    ['comedy', 'sci-fi', 'thriller']
+
+    A common mistake is to pass in a list, which leads to the following issue:
+
+    >>> mlb = MultiLabelBinarizer()
+    >>> mlb.fit(['sci-fi', 'thriller', 'comedy'])
+    MultiLabelBinarizer()
+    >>> mlb.classes_
+    array(['-', 'c', 'd', 'e', 'f', 'h', 'i', 'l', 'm', 'o', 'r', 's', 't',
+        'y'], dtype=object)
+
+    To correct this, the list of labels should be passed in as:
+
+    >>> mlb = MultiLabelBinarizer()
+    >>> mlb.fit([['sci-fi', 'thriller', 'comedy']])
+    MultiLabelBinarizer()
+    >>> mlb.classes_
+    array(['comedy', 'sci-fi', 'thriller'], dtype=object)
+    """
+
+    _parameter_constraints: dict = {
+        "classes": ["array-like", None],
+        "sparse_output": ["boolean"],
+    }
+
+    def __init__(self, *, classes=None, sparse_output=False):
+        self.classes = classes
+        self.sparse_output = sparse_output
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, y):
+        """Fit the label sets binarizer, storing :term:`classes_`.
+
+        Parameters
+        ----------
+        y : iterable of iterables
+            A set of labels (any orderable and hashable object) for each
+            sample. If the `classes` parameter is set, `y` will not be
+            iterated.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        self._cached_dict = None
+
+        if self.classes is None:
+            classes = sorted(set(itertools.chain.from_iterable(y)))
+        elif len(set(self.classes)) < len(self.classes):
+            raise ValueError(
+                "The classes argument contains duplicate "
+                "classes. Remove these duplicates before passing "
+                "them to MultiLabelBinarizer."
+            )
+        else:
+            classes = self.classes
+        dtype = int if all(isinstance(c, int) for c in classes) else object
+        self.classes_ = np.empty(len(classes), dtype=dtype)
+        self.classes_[:] = classes
+        return self
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit_transform(self, y):
+        """Fit the label sets binarizer and transform the given label sets.
+
+        Parameters
+        ----------
+        y : iterable of iterables
+            A set of labels (any orderable and hashable object) for each
+            sample. If the `classes` parameter is set, `y` will not be
+            iterated.
+
+        Returns
+        -------
+        y_indicator : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]`
+            is in `y[i]`, and 0 otherwise. Sparse matrix will be of CSR
+            format.
+        """
+        if self.classes is not None:
+            return self.fit(y).transform(y)
+
+        self._cached_dict = None
+
+        # Automatically increment on new class
+        class_mapping = defaultdict(int)
+        class_mapping.default_factory = class_mapping.__len__
+        yt = self._transform(y, class_mapping)
+
+        # sort classes and reorder columns
+        tmp = sorted(class_mapping, key=class_mapping.get)
+
+        # (make safe for tuples)
+        dtype = int if all(isinstance(c, int) for c in tmp) else object
+        class_mapping = np.empty(len(tmp), dtype=dtype)
+        class_mapping[:] = tmp
+        self.classes_, inverse = np.unique(class_mapping, return_inverse=True)
+        # ensure yt.indices keeps its current dtype
+        yt.indices = np.asarray(inverse[yt.indices], dtype=yt.indices.dtype)
+
+        if not self.sparse_output:
+            yt = yt.toarray()
+
+        return yt
+
+    def transform(self, y):
+        """Transform the given label sets.
+
+        Parameters
+        ----------
+        y : iterable of iterables
+            A set of labels (any orderable and hashable object) for each
+            sample. If the `classes` parameter is set, `y` will not be
+            iterated.
+
+        Returns
+        -------
+        y_indicator : array or CSR matrix, shape (n_samples, n_classes)
+            A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in
+            `y[i]`, and 0 otherwise.
+        """
+        check_is_fitted(self)
+
+        class_to_index = self._build_cache()
+        yt = self._transform(y, class_to_index)
+
+        if not self.sparse_output:
+            yt = yt.toarray()
+
+        return yt
+
+    def _build_cache(self):
+        if self._cached_dict is None:
+            self._cached_dict = dict(zip(self.classes_, range(len(self.classes_))))
+
+        return self._cached_dict
+
+    def _transform(self, y, class_mapping):
+        """Transforms the label sets with a given mapping.
+
+        Parameters
+        ----------
+        y : iterable of iterables
+            A set of labels (any orderable and hashable object) for each
+            sample. If the `classes` parameter is set, `y` will not be
+            iterated.
+
+        class_mapping : Mapping
+            Maps from label to column index in label indicator matrix.
+
+        Returns
+        -------
+        y_indicator : sparse matrix of shape (n_samples, n_classes)
+            Label indicator matrix. Will be of CSR format.
+        """
+        indices = array.array("i")
+        indptr = array.array("i", [0])
+        unknown = set()
+        for labels in y:
+            index = set()
+            for label in labels:
+                try:
+                    index.add(class_mapping[label])
+                except KeyError:
+                    unknown.add(label)
+            indices.extend(index)
+            indptr.append(len(indices))
+        if unknown:
+            warnings.warn(
+                "unknown class(es) {0} will be ignored".format(sorted(unknown, key=str))
+            )
+        data = np.ones(len(indices), dtype=int)
+
+        return sp.csr_matrix(
+            (data, indices, indptr), shape=(len(indptr) - 1, len(class_mapping))
+        )
+
+    def inverse_transform(self, yt):
+        """Transform the given indicator matrix into label sets.
+
+        Parameters
+        ----------
+        yt : {ndarray, sparse matrix} of shape (n_samples, n_classes)
+            A matrix containing only 1s ands 0s.
+
+        Returns
+        -------
+        y_original : list of tuples
+            The set of labels for each sample such that `y[i]` consists of
+            `classes_[j]` for each `yt[i, j] == 1`.
+        """
+        check_is_fitted(self)
+
+        if yt.shape[1] != len(self.classes_):
+            raise ValueError(
+                "Expected indicator for {0} classes, but got {1}".format(
+                    len(self.classes_), yt.shape[1]
+                )
+            )
+
+        if sp.issparse(yt):
+            yt = yt.tocsr()
+            if len(yt.data) != 0 and len(np.setdiff1d(yt.data, [0, 1])) > 0:
+                raise ValueError("Expected only 0s and 1s in label indicator.")
+            return [
+                tuple(self.classes_.take(yt.indices[start:end]))
+                for start, end in zip(yt.indptr[:-1], yt.indptr[1:])
+            ]
+        else:
+            unexpected = np.setdiff1d(yt, [0, 1])
+            if len(unexpected) > 0:
+                raise ValueError(
+                    "Expected only 0s and 1s in label indicator. Also got {0}".format(
+                        unexpected
+                    )
+                )
+            return [tuple(self.classes_.compress(indicators)) for indicators in yt]
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.two_d_array = False
+        tags.target_tags.two_d_labels = True
+        return tags
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_polynomial.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_polynomial.py
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_target_encoder.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_target_encoder.py
@@ -0,0 +1,534 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from numbers import Integral, Real
+
+import numpy as np
+
+from ..base import OneToOneFeatureMixin, _fit_context
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.multiclass import type_of_target
+from ..utils.validation import (
+    _check_feature_names_in,
+    _check_y,
+    check_consistent_length,
+    check_is_fitted,
+)
+from ._encoders import _BaseEncoder
+from ._target_encoder_fast import _fit_encoding_fast, _fit_encoding_fast_auto_smooth
+
+
+class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
+    """Target Encoder for regression and classification targets.
+
+    Each category is encoded based on a shrunk estimate of the average target
+    values for observations belonging to the category. The encoding scheme mixes
+    the global target mean with the target mean conditioned on the value of the
+    category (see [MIC]_).
+
+    When the target type is "multiclass", encodings are based
+    on the conditional probability estimate for each class. The target is first
+    binarized using the "one-vs-all" scheme via
+    :class:`~sklearn.preprocessing.LabelBinarizer`, then the average target
+    value for each class and each category is used for encoding, resulting in
+    `n_features` * `n_classes` encoded output features.
+
+    :class:`TargetEncoder` considers missing values, such as `np.nan` or `None`,
+    as another category and encodes them like any other category. Categories
+    that are not seen during :meth:`fit` are encoded with the target mean, i.e.
+    `target_mean_`.
+
+    For a demo on the importance of the `TargetEncoder` internal cross-fitting,
+    see
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder_cross_val.py`.
+    For a comparison of different encoders, refer to
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py`. Read
+    more in the :ref:`User Guide <target_encoder>`.
+
+    .. note::
+        `fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a
+        :term:`cross fitting` scheme is used in `fit_transform` for encoding.
+        See the :ref:`User Guide <target_encoder>` for details.
+
+    .. versionadded:: 1.3
+
+    Parameters
+    ----------
+    categories : "auto" or list of shape (n_features,) of array-like, default="auto"
+        Categories (unique values) per feature:
+
+        - `"auto"` : Determine categories automatically from the training data.
+        - list : `categories[i]` holds the categories expected in the i-th column. The
+          passed categories should not mix strings and numeric values within a single
+          feature, and should be sorted in case of numeric values.
+
+        The used categories are stored in the `categories_` fitted attribute.
+
+    target_type : {"auto", "continuous", "binary", "multiclass"}, default="auto"
+        Type of target.
+
+        - `"auto"` : Type of target is inferred with
+          :func:`~sklearn.utils.multiclass.type_of_target`.
+        - `"continuous"` : Continuous target
+        - `"binary"` : Binary target
+        - `"multiclass"` : Multiclass target
+
+        .. note::
+            The type of target inferred with `"auto"` may not be the desired target
+            type used for modeling. For example, if the target consisted of integers
+            between 0 and 100, then :func:`~sklearn.utils.multiclass.type_of_target`
+            will infer the target as `"multiclass"`. In this case, setting
+            `target_type="continuous"` will specify the target as a regression
+            problem. The `target_type_` attribute gives the target type used by the
+            encoder.
+
+        .. versionchanged:: 1.4
+           Added the option 'multiclass'.
+
+    smooth : "auto" or float, default="auto"
+        The amount of mixing of the target mean conditioned on the value of the
+        category with the global target mean. A larger `smooth` value will put
+        more weight on the global target mean.
+        If `"auto"`, then `smooth` is set to an empirical Bayes estimate.
+
+    cv : int, default=5
+        Determines the number of folds in the :term:`cross fitting` strategy used in
+        :meth:`fit_transform`. For classification targets, `StratifiedKFold` is used
+        and for continuous targets, `KFold` is used.
+
+    shuffle : bool, default=True
+        Whether to shuffle the data in :meth:`fit_transform` before splitting into
+        folds. Note that the samples within each split will not be shuffled.
+
+    random_state : int, RandomState instance or None, default=None
+        When `shuffle` is True, `random_state` affects the ordering of the
+        indices, which controls the randomness of each fold. Otherwise, this
+        parameter has no effect.
+        Pass an int for reproducible output across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    encodings_ : list of shape (n_features,) or (n_features * n_classes) of \
+                    ndarray
+        Encodings learnt on all of `X`.
+        For feature `i`, `encodings_[i]` are the encodings matching the
+        categories listed in `categories_[i]`. When `target_type_` is
+        "multiclass", the encoding for feature `i` and class `j` is stored in
+        `encodings_[j + (i * len(classes_))]`. E.g., for 2 features (f) and
+        3 classes (c), encodings are ordered:
+        f0_c0, f0_c1, f0_c2, f1_c0, f1_c1, f1_c2,
+
+    categories_ : list of shape (n_features,) of ndarray
+        The categories of each input feature determined during fitting or
+        specified in `categories`
+        (in order of the features in `X` and corresponding with the output
+        of :meth:`transform`).
+
+    target_type_ : str
+        Type of target.
+
+    target_mean_ : float
+        The overall mean of the target. This value is only used in :meth:`transform`
+        to encode categories.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+    classes_ : ndarray or None
+        If `target_type_` is 'binary' or 'multiclass', holds the label for each class,
+        otherwise `None`.
+
+    See Also
+    --------
+    OrdinalEncoder : Performs an ordinal (integer) encoding of the categorical features.
+        Contrary to TargetEncoder, this encoding is not supervised. Treating the
+        resulting encoding as a numerical features therefore lead arbitrarily
+        ordered values and therefore typically lead to lower predictive performance
+        when used as preprocessing for a classifier or regressor.
+    OneHotEncoder : Performs a one-hot encoding of categorical features. This
+        unsupervised encoding is better suited for low cardinality categorical
+        variables as it generate one new feature per unique category.
+
+    References
+    ----------
+    .. [MIC] :doi:`Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality
+       categorical attributes in classification and prediction problems"
+       SIGKDD Explor. Newsl. 3, 1 (July 2001), 27–32. <10.1145/507533.507538>`
+
+    Examples
+    --------
+    With `smooth="auto"`, the smoothing parameter is set to an empirical Bayes estimate:
+
+    >>> import numpy as np
+    >>> from sklearn.preprocessing import TargetEncoder
+    >>> X = np.array([["dog"] * 20 + ["cat"] * 30 + ["snake"] * 38], dtype=object).T
+    >>> y = [90.3] * 5 + [80.1] * 15 + [20.4] * 5 + [20.1] * 25 + [21.2] * 8 + [49] * 30
+    >>> enc_auto = TargetEncoder(smooth="auto")
+    >>> X_trans = enc_auto.fit_transform(X, y)
+
+    >>> # A high `smooth` parameter puts more weight on global mean on the categorical
+    >>> # encodings:
+    >>> enc_high_smooth = TargetEncoder(smooth=5000.0).fit(X, y)
+    >>> enc_high_smooth.target_mean_
+    np.float64(44.3)
+    >>> enc_high_smooth.encodings_
+    [array([44.1, 44.4, 44.3])]
+
+    >>> # On the other hand, a low `smooth` parameter puts more weight on target
+    >>> # conditioned on the value of the categorical:
+    >>> enc_low_smooth = TargetEncoder(smooth=1.0).fit(X, y)
+    >>> enc_low_smooth.encodings_
+    [array([21, 80.8, 43.2])]
+    """
+
+    _parameter_constraints: dict = {
+        "categories": [StrOptions({"auto"}), list],
+        "target_type": [StrOptions({"auto", "continuous", "binary", "multiclass"})],
+        "smooth": [StrOptions({"auto"}), Interval(Real, 0, None, closed="left")],
+        "cv": [Interval(Integral, 2, None, closed="left")],
+        "shuffle": ["boolean"],
+        "random_state": ["random_state"],
+    }
+
+    def __init__(
+        self,
+        categories="auto",
+        target_type="auto",
+        smooth="auto",
+        cv=5,
+        shuffle=True,
+        random_state=None,
+    ):
+        self.categories = categories
+        self.smooth = smooth
+        self.target_type = target_type
+        self.cv = cv
+        self.shuffle = shuffle
+        self.random_state = random_state
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y):
+        """Fit the :class:`TargetEncoder` to X and y.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data to determine the categories of each feature.
+
+        y : array-like of shape (n_samples,)
+            The target data used to encode the categories.
+
+        Returns
+        -------
+        self : object
+            Fitted encoder.
+        """
+        self._fit_encodings_all(X, y)
+        return self
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit_transform(self, X, y):
+        """Fit :class:`TargetEncoder` and transform X with the target encoding.
+
+        .. note::
+            `fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a
+            :term:`cross fitting` scheme is used in `fit_transform` for encoding.
+            See the :ref:`User Guide <target_encoder>`. for details.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data to determine the categories of each feature.
+
+        y : array-like of shape (n_samples,)
+            The target data used to encode the categories.
+
+        Returns
+        -------
+        X_trans : ndarray of shape (n_samples, n_features) or \
+                    (n_samples, (n_features * n_classes))
+            Transformed input.
+        """
+        from ..model_selection import KFold, StratifiedKFold  # avoid circular import
+
+        X_ordinal, X_known_mask, y_encoded, n_categories = self._fit_encodings_all(X, y)
+
+        # The cv splitter is voluntarily restricted to *KFold to enforce non
+        # overlapping validation folds, otherwise the fit_transform output will
+        # not be well-specified.
+        if self.target_type_ == "continuous":
+            cv = KFold(self.cv, shuffle=self.shuffle, random_state=self.random_state)
+        else:
+            cv = StratifiedKFold(
+                self.cv, shuffle=self.shuffle, random_state=self.random_state
+            )
+
+        # If 'multiclass' multiply axis=1 by num classes else keep shape the same
+        if self.target_type_ == "multiclass":
+            X_out = np.empty(
+                (X_ordinal.shape[0], X_ordinal.shape[1] * len(self.classes_)),
+                dtype=np.float64,
+            )
+        else:
+            X_out = np.empty_like(X_ordinal, dtype=np.float64)
+
+        for train_idx, test_idx in cv.split(X, y):
+            X_train, y_train = X_ordinal[train_idx, :], y_encoded[train_idx]
+            y_train_mean = np.mean(y_train, axis=0)
+
+            if self.target_type_ == "multiclass":
+                encodings = self._fit_encoding_multiclass(
+                    X_train,
+                    y_train,
+                    n_categories,
+                    y_train_mean,
+                )
+            else:
+                encodings = self._fit_encoding_binary_or_continuous(
+                    X_train,
+                    y_train,
+                    n_categories,
+                    y_train_mean,
+                )
+            self._transform_X_ordinal(
+                X_out,
+                X_ordinal,
+                ~X_known_mask,
+                test_idx,
+                encodings,
+                y_train_mean,
+            )
+        return X_out
+
+    def transform(self, X):
+        """Transform X with the target encoding.
+
+        .. note::
+            `fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a
+            :term:`cross fitting` scheme is used in `fit_transform` for encoding.
+            See the :ref:`User Guide <target_encoder>`. for details.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data to determine the categories of each feature.
+
+        Returns
+        -------
+        X_trans : ndarray of shape (n_samples, n_features) or \
+                    (n_samples, (n_features * n_classes))
+            Transformed input.
+        """
+        X_ordinal, X_known_mask = self._transform(
+            X, handle_unknown="ignore", ensure_all_finite="allow-nan"
+        )
+
+        # If 'multiclass' multiply axis=1 by num of classes else keep shape the same
+        if self.target_type_ == "multiclass":
+            X_out = np.empty(
+                (X_ordinal.shape[0], X_ordinal.shape[1] * len(self.classes_)),
+                dtype=np.float64,
+            )
+        else:
+            X_out = np.empty_like(X_ordinal, dtype=np.float64)
+
+        self._transform_X_ordinal(
+            X_out,
+            X_ordinal,
+            ~X_known_mask,
+            slice(None),
+            self.encodings_,
+            self.target_mean_,
+        )
+        return X_out
+
+    def _fit_encodings_all(self, X, y):
+        """Fit a target encoding with all the data."""
+        # avoid circular import
+        from ..preprocessing import (
+            LabelBinarizer,
+            LabelEncoder,
+        )
+
+        check_consistent_length(X, y)
+        self._fit(X, handle_unknown="ignore", ensure_all_finite="allow-nan")
+
+        if self.target_type == "auto":
+            accepted_target_types = ("binary", "multiclass", "continuous")
+            inferred_type_of_target = type_of_target(y, input_name="y")
+            if inferred_type_of_target not in accepted_target_types:
+                raise ValueError(
+                    "Unknown label type: Target type was inferred to be "
+                    f"{inferred_type_of_target!r}. Only {accepted_target_types} are "
+                    "supported."
+                )
+            self.target_type_ = inferred_type_of_target
+        else:
+            self.target_type_ = self.target_type
+
+        self.classes_ = None
+        if self.target_type_ == "binary":
+            label_encoder = LabelEncoder()
+            y = label_encoder.fit_transform(y)
+            self.classes_ = label_encoder.classes_
+        elif self.target_type_ == "multiclass":
+            label_binarizer = LabelBinarizer()
+            y = label_binarizer.fit_transform(y)
+            self.classes_ = label_binarizer.classes_
+        else:  # continuous
+            y = _check_y(y, y_numeric=True, estimator=self)
+
+        self.target_mean_ = np.mean(y, axis=0)
+
+        X_ordinal, X_known_mask = self._transform(
+            X, handle_unknown="ignore", ensure_all_finite="allow-nan"
+        )
+        n_categories = np.fromiter(
+            (len(category_for_feature) for category_for_feature in self.categories_),
+            dtype=np.int64,
+            count=len(self.categories_),
+        )
+        if self.target_type_ == "multiclass":
+            encodings = self._fit_encoding_multiclass(
+                X_ordinal,
+                y,
+                n_categories,
+                self.target_mean_,
+            )
+        else:
+            encodings = self._fit_encoding_binary_or_continuous(
+                X_ordinal,
+                y,
+                n_categories,
+                self.target_mean_,
+            )
+        self.encodings_ = encodings
+
+        return X_ordinal, X_known_mask, y, n_categories
+
+    def _fit_encoding_binary_or_continuous(
+        self, X_ordinal, y, n_categories, target_mean
+    ):
+        """Learn target encodings."""
+        if self.smooth == "auto":
+            y_variance = np.var(y)
+            encodings = _fit_encoding_fast_auto_smooth(
+                X_ordinal,
+                y,
+                n_categories,
+                target_mean,
+                y_variance,
+            )
+        else:
+            encodings = _fit_encoding_fast(
+                X_ordinal,
+                y,
+                n_categories,
+                self.smooth,
+                target_mean,
+            )
+        return encodings
+
+    def _fit_encoding_multiclass(self, X_ordinal, y, n_categories, target_mean):
+        """Learn multiclass encodings.
+
+        Learn encodings for each class (c) then reorder encodings such that
+        the same features (f) are grouped together. `reorder_index` enables
+        converting from:
+        f0_c0, f1_c0, f0_c1, f1_c1, f0_c2, f1_c2
+        to:
+        f0_c0, f0_c1, f0_c2, f1_c0, f1_c1, f1_c2
+        """
+        n_features = self.n_features_in_
+        n_classes = len(self.classes_)
+
+        encodings = []
+        for i in range(n_classes):
+            y_class = y[:, i]
+            encoding = self._fit_encoding_binary_or_continuous(
+                X_ordinal,
+                y_class,
+                n_categories,
+                target_mean[i],
+            )
+            encodings.extend(encoding)
+
+        reorder_index = (
+            idx
+            for start in range(n_features)
+            for idx in range(start, (n_classes * n_features), n_features)
+        )
+        return [encodings[idx] for idx in reorder_index]
+
+    def _transform_X_ordinal(
+        self,
+        X_out,
+        X_ordinal,
+        X_unknown_mask,
+        row_indices,
+        encodings,
+        target_mean,
+    ):
+        """Transform X_ordinal using encodings.
+
+        In the multiclass case, `X_ordinal` and `X_unknown_mask` have column
+        (axis=1) size `n_features`, while `encodings` has length of size
+        `n_features * n_classes`. `feat_idx` deals with this by repeating
+        feature indices by `n_classes` E.g., for 3 features, 2 classes:
+        0,0,1,1,2,2
+
+        Additionally, `target_mean` is of shape (`n_classes`,) so `mean_idx`
+        cycles through 0 to `n_classes` - 1, `n_features` times.
+        """
+        if self.target_type_ == "multiclass":
+            n_classes = len(self.classes_)
+            for e_idx, encoding in enumerate(encodings):
+                # Repeat feature indices by n_classes
+                feat_idx = e_idx // n_classes
+                # Cycle through each class
+                mean_idx = e_idx % n_classes
+                X_out[row_indices, e_idx] = encoding[X_ordinal[row_indices, feat_idx]]
+                X_out[X_unknown_mask[:, feat_idx], e_idx] = target_mean[mean_idx]
+        else:
+            for e_idx, encoding in enumerate(encodings):
+                X_out[row_indices, e_idx] = encoding[X_ordinal[row_indices, e_idx]]
+                X_out[X_unknown_mask[:, e_idx], e_idx] = target_mean
+
+    def get_feature_names_out(self, input_features=None):
+        """Get output feature names for transformation.
+
+        Parameters
+        ----------
+        input_features : array-like of str or None, default=None
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        feature_names_out : ndarray of str objects
+            Transformed feature names. `feature_names_in_` is used unless it is
+            not defined, in which case the following input feature names are
+            generated: `["x0", "x1", ..., "x(n_features_in_ - 1)"]`.
+            When `type_of_target_` is "multiclass" the names are of the format
+            '<feature_name>_<class_name>'.
+        """
+        check_is_fitted(self, "n_features_in_")
+        feature_names = _check_feature_names_in(self, input_features)
+        if self.target_type_ == "multiclass":
+            feature_names = [
+                f"{feature_name}_{class_name}"
+                for feature_name in feature_names
+                for class_name in self.classes_
+            ]
+            return np.asarray(feature_names, dtype=object)
+        else:
+            return feature_names
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.target_tags.required = True
+        return tags
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_target_encoder_fast.cpython-312-x86_64-linux-gnu.so
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_target_encoder_fast.cpython-312-x86_64-linux-gnu.so
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/_target_encoder_fast.pyx
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/_target_encoder_fast.pyx
@@ -0,0 +1,167 @@
+from libc.math cimport isnan
+from libcpp.vector cimport vector
+
+from ..utils._typedefs cimport float32_t, float64_t, int32_t, int64_t
+
+import numpy as np
+
+
+ctypedef fused INT_DTYPE:
+    int64_t
+    int32_t
+
+ctypedef fused Y_DTYPE:
+    int64_t
+    int32_t
+    float64_t
+    float32_t
+
+
+def _fit_encoding_fast(
+    INT_DTYPE[:, ::1] X_int,
+    const Y_DTYPE[:] y,
+    int64_t[::1] n_categories,
+    double smooth,
+    double y_mean,
+):
+    """Fit a target encoding on X_int and y.
+
+    This implementation uses Eq 7 from [1] to compute the encoding.
+    As stated in the paper, Eq 7 is the same as Eq 3.
+
+    [1]: Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality
+         categorical attributes in classification and prediction problems"
+    """
+    cdef:
+        int64_t sample_idx, feat_idx, cat_idx, n_cats
+        INT_DTYPE X_int_tmp
+        int n_samples = X_int.shape[0]
+        int n_features = X_int.shape[1]
+        double smooth_sum = smooth * y_mean
+        int64_t max_n_cats = np.max(n_categories)
+        double[::1] sums = np.empty(max_n_cats, dtype=np.float64)
+        double[::1] counts = np.empty(max_n_cats, dtype=np.float64)
+        list encodings = []
+        double[::1] current_encoding
+        # Gives access to encodings without gil
+        vector[double*] encoding_vec
+
+    encoding_vec.resize(n_features)
+    for feat_idx in range(n_features):
+        current_encoding = np.empty(shape=n_categories[feat_idx], dtype=np.float64)
+        encoding_vec[feat_idx] = &current_encoding[0]
+        encodings.append(np.asarray(current_encoding))
+
+    with nogil:
+        for feat_idx in range(n_features):
+            n_cats = n_categories[feat_idx]
+
+            for cat_idx in range(n_cats):
+                sums[cat_idx] = smooth_sum
+                counts[cat_idx] = smooth
+
+            for sample_idx in range(n_samples):
+                X_int_tmp = X_int[sample_idx, feat_idx]
+                # -1 are unknown categories, which are not counted
+                if X_int_tmp == -1:
+                    continue
+                sums[X_int_tmp] += y[sample_idx]
+                counts[X_int_tmp] += 1.0
+
+            for cat_idx in range(n_cats):
+                if counts[cat_idx] == 0:
+                    encoding_vec[feat_idx][cat_idx] = y_mean
+                else:
+                    encoding_vec[feat_idx][cat_idx] = sums[cat_idx] / counts[cat_idx]
+
+    return encodings
+
+
+def _fit_encoding_fast_auto_smooth(
+    INT_DTYPE[:, ::1] X_int,
+    const Y_DTYPE[:] y,
+    int64_t[::1] n_categories,
+    double y_mean,
+    double y_variance,
+):
+    """Fit a target encoding on X_int and y with auto smoothing.
+
+    This implementation uses Eq 5 and 6 from [1].
+
+    [1]: Micci-Barreca, Daniele. "A preprocessing scheme for high-cardinality
+         categorical attributes in classification and prediction problems"
+    """
+    cdef:
+        int64_t sample_idx, feat_idx, cat_idx, n_cats
+        INT_DTYPE X_int_tmp
+        double diff
+        int n_samples = X_int.shape[0]
+        int n_features = X_int.shape[1]
+        int64_t max_n_cats = np.max(n_categories)
+        double[::1] means = np.empty(max_n_cats, dtype=np.float64)
+        int64_t[::1] counts = np.empty(max_n_cats, dtype=np.int64)
+        double[::1] sum_of_squared_diffs = np.empty(max_n_cats, dtype=np.float64)
+        double lambda_
+        list encodings = []
+        double[::1] current_encoding
+        # Gives access to encodings without gil
+        vector[double*] encoding_vec
+
+    encoding_vec.resize(n_features)
+    for feat_idx in range(n_features):
+        current_encoding = np.empty(shape=n_categories[feat_idx], dtype=np.float64)
+        encoding_vec[feat_idx] = &current_encoding[0]
+        encodings.append(np.asarray(current_encoding))
+
+    # TODO: parallelize this with OpenMP prange. When n_features >= n_threads, it's
+    # probably good to parallelize the outer loop. When n_features is too small,
+    # then it would probably better to parallelize the nested loops on n_samples and
+    # n_cats, but the code to handle thread-local temporary variables might be
+    # significantly more complex.
+    with nogil:
+        for feat_idx in range(n_features):
+            n_cats = n_categories[feat_idx]
+
+            for cat_idx in range(n_cats):
+                means[cat_idx] = 0.0
+                counts[cat_idx] = 0
+                sum_of_squared_diffs[cat_idx] = 0.0
+
+            # first pass to compute the mean
+            for sample_idx in range(n_samples):
+                X_int_tmp = X_int[sample_idx, feat_idx]
+
+                # -1 are unknown categories, which are not counted
+                if X_int_tmp == -1:
+                    continue
+                counts[X_int_tmp] += 1
+                means[X_int_tmp] += y[sample_idx]
+
+            for cat_idx in range(n_cats):
+                means[cat_idx] /= counts[cat_idx]
+
+            # second pass to compute the sum of squared differences
+            for sample_idx in range(n_samples):
+                X_int_tmp = X_int[sample_idx, feat_idx]
+                if X_int_tmp == -1:
+                    continue
+                diff = y[sample_idx] - means[X_int_tmp]
+                sum_of_squared_diffs[X_int_tmp] += diff * diff
+
+            for cat_idx in range(n_cats):
+                lambda_ = (
+                    y_variance * counts[cat_idx] /
+                    (y_variance * counts[cat_idx] + sum_of_squared_diffs[cat_idx] /
+                     counts[cat_idx])
+                )
+                if isnan(lambda_):
+                    # A nan can happen when:
+                    # 1. counts[cat_idx] == 0
+                    # 2. y_variance == 0 and sum_of_squared_diffs[cat_idx] == 0
+                    encoding_vec[feat_idx][cat_idx] = y_mean
+                else:
+                    encoding_vec[feat_idx][cat_idx] = (
+                        lambda_ * means[cat_idx] + (1 - lambda_) * y_mean
+                    )
+
+    return encodings
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/meson.build
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/meson.build
@@ -0,0 +1,13 @@
+py.extension_module(
+  '_csr_polynomial_expansion',
+  [cython_gen.process('_csr_polynomial_expansion.pyx'), utils_cython_tree],
+  subdir: 'sklearn/preprocessing',
+  install: true
+)
+
+py.extension_module(
+  '_target_encoder_fast',
+  [cython_gen_cpp.process('_target_encoder_fast.pyx'), utils_cython_tree],
+  subdir: 'sklearn/preprocessing',
+  install: true
+)
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/init.py
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_common.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_common.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_data.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_data.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_discretization.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_discretization.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_encoders.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_encoders.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_function_transformer.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_function_transformer.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_label.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_label.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_polynomial.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_polynomial.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_target_encoder.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/pycache/test_target_encoder.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_common.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_common.py
@@ -0,0 +1,187 @@
+import warnings
+
+import numpy as np
+import pytest
+
+from sklearn.base import clone
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import (
+    MaxAbsScaler,
+    MinMaxScaler,
+    PowerTransformer,
+    QuantileTransformer,
+    RobustScaler,
+    StandardScaler,
+    maxabs_scale,
+    minmax_scale,
+    power_transform,
+    quantile_transform,
+    robust_scale,
+    scale,
+)
+from sklearn.utils._testing import assert_allclose, assert_array_equal
+from sklearn.utils.fixes import (
+    BSR_CONTAINERS,
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    DIA_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+
+iris = load_iris()
+
+
+def _get_valid_samples_by_column(X, col):
+    """Get non NaN samples in column of X"""
+    return X[:, [col]][~np.isnan(X[:, col])]
+
+
+@pytest.mark.parametrize(
+    "est, func, support_sparse, strictly_positive, omit_kwargs",
+    [
+        (MaxAbsScaler(), maxabs_scale, True, False, []),
+        (MinMaxScaler(), minmax_scale, False, False, ["clip"]),
+        (StandardScaler(), scale, False, False, []),
+        (StandardScaler(with_mean=False), scale, True, False, []),
+        (PowerTransformer("yeo-johnson"), power_transform, False, False, []),
+        (PowerTransformer("box-cox"), power_transform, False, True, []),
+        (QuantileTransformer(n_quantiles=10), quantile_transform, True, False, []),
+        (RobustScaler(), robust_scale, False, False, []),
+        (RobustScaler(with_centering=False), robust_scale, True, False, []),
+    ],
+)
+def test_missing_value_handling(
+    est, func, support_sparse, strictly_positive, omit_kwargs
+):
+    # check that the preprocessing method let pass nan
+    rng = np.random.RandomState(42)
+    X = iris.data.copy()
+    n_missing = 50
+    X[
+        rng.randint(X.shape[0], size=n_missing), rng.randint(X.shape[1], size=n_missing)
+    ] = np.nan
+    if strictly_positive:
+        X += np.nanmin(X) + 0.1
+    X_train, X_test = train_test_split(X, random_state=1)
+    # sanity check
+    assert not np.all(np.isnan(X_train), axis=0).any()
+    assert np.any(np.isnan(X_train), axis=0).all()
+    assert np.any(np.isnan(X_test), axis=0).all()
+    X_test[:, 0] = np.nan  # make sure this boundary case is tested
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+        Xt = est.fit(X_train).transform(X_test)
+    # ensure no warnings are raised
+    # missing values should still be missing, and only them
+    assert_array_equal(np.isnan(Xt), np.isnan(X_test))
+
+    # check that the function leads to the same results as the class
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+        Xt_class = est.transform(X_train)
+    kwargs = est.get_params()
+    # remove the parameters which should be omitted because they
+    # are not defined in the counterpart function of the preprocessing class
+    for kwarg in omit_kwargs:
+        _ = kwargs.pop(kwarg)
+    Xt_func = func(X_train, **kwargs)
+    assert_array_equal(np.isnan(Xt_func), np.isnan(Xt_class))
+    assert_allclose(Xt_func[~np.isnan(Xt_func)], Xt_class[~np.isnan(Xt_class)])
+
+    # check that the inverse transform keep NaN
+    Xt_inv = est.inverse_transform(Xt)
+    assert_array_equal(np.isnan(Xt_inv), np.isnan(X_test))
+    # FIXME: we can introduce equal_nan=True in recent version of numpy.
+    # For the moment which just check that non-NaN values are almost equal.
+    assert_allclose(Xt_inv[~np.isnan(Xt_inv)], X_test[~np.isnan(X_test)])
+
+    for i in range(X.shape[1]):
+        # train only on non-NaN
+        est.fit(_get_valid_samples_by_column(X_train, i))
+        # check transforming with NaN works even when training without NaN
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", RuntimeWarning)
+            Xt_col = est.transform(X_test[:, [i]])
+        assert_allclose(Xt_col, Xt[:, [i]])
+        # check non-NaN is handled as before - the 1st column is all nan
+        if not np.isnan(X_test[:, i]).all():
+            Xt_col_nonan = est.transform(_get_valid_samples_by_column(X_test, i))
+            assert_array_equal(Xt_col_nonan, Xt_col[~np.isnan(Xt_col.squeeze())])
+
+    if support_sparse:
+        est_dense = clone(est)
+        est_sparse = clone(est)
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error", RuntimeWarning)
+            Xt_dense = est_dense.fit(X_train).transform(X_test)
+            Xt_inv_dense = est_dense.inverse_transform(Xt_dense)
+
+        for sparse_container in (
+            BSR_CONTAINERS
+            + COO_CONTAINERS
+            + CSC_CONTAINERS
+            + CSR_CONTAINERS
+            + DIA_CONTAINERS
+            + DOK_CONTAINERS
+            + LIL_CONTAINERS
+        ):
+            # check that the dense and sparse inputs lead to the same results
+            # precompute the matrix to avoid catching side warnings
+            X_train_sp = sparse_container(X_train)
+            X_test_sp = sparse_container(X_test)
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", PendingDeprecationWarning)
+                warnings.simplefilter("error", RuntimeWarning)
+                Xt_sp = est_sparse.fit(X_train_sp).transform(X_test_sp)
+
+            assert_allclose(Xt_sp.toarray(), Xt_dense)
+            with warnings.catch_warnings():
+                warnings.simplefilter("ignore", PendingDeprecationWarning)
+                warnings.simplefilter("error", RuntimeWarning)
+                Xt_inv_sp = est_sparse.inverse_transform(Xt_sp)
+
+            assert_allclose(Xt_inv_sp.toarray(), Xt_inv_dense)
+
+
+@pytest.mark.parametrize(
+    "est, func",
+    [
+        (MaxAbsScaler(), maxabs_scale),
+        (MinMaxScaler(), minmax_scale),
+        (StandardScaler(), scale),
+        (StandardScaler(with_mean=False), scale),
+        (PowerTransformer("yeo-johnson"), power_transform),
+        (
+            PowerTransformer("box-cox"),
+            power_transform,
+        ),
+        (QuantileTransformer(n_quantiles=3), quantile_transform),
+        (RobustScaler(), robust_scale),
+        (RobustScaler(with_centering=False), robust_scale),
+    ],
+)
+def test_missing_value_pandas_na_support(est, func):
+    # Test pandas IntegerArray with pd.NA
+    pd = pytest.importorskip("pandas")
+
+    X = np.array(
+        [
+            [1, 2, 3, np.nan, np.nan, 4, 5, 1],
+            [np.nan, np.nan, 8, 4, 6, np.nan, np.nan, 8],
+            [1, 2, 3, 4, 5, 6, 7, 8],
+        ]
+    ).T
+
+    # Creates dataframe with IntegerArrays with pd.NA
+    X_df = pd.DataFrame(X, dtype="Int16", columns=["a", "b", "c"])
+    X_df["c"] = X_df["c"].astype("int")
+
+    X_trans = est.fit_transform(X)
+    X_df_trans = est.fit_transform(X_df)
+
+    assert_allclose(X_trans, X_df_trans)
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_data.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_data.py
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_discretization.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_discretization.py
@@ -0,0 +1,665 @@
+import warnings
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+
+from sklearn import clone
+from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+
+X = [[-2, 1.5, -4, -1], [-1, 2.5, -3, -0.5], [0, 3.5, -2, 0.5], [1, 4.5, -1, 2]]
+
+
+@pytest.mark.parametrize(
+    "strategy, quantile_method, expected, sample_weight",
+    [
+        (
+            "uniform",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [1, 1, 1, 0], [2, 2, 2, 1], [2, 2, 2, 2]],
+            None,
+        ),
+        (
+            "kmeans",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]],
+            None,
+        ),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]],
+            None,
+        ),
+        (
+            "uniform",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [1, 1, 1, 0], [2, 2, 2, 1], [2, 2, 2, 2]],
+            [1, 1, 2, 1],
+        ),
+        (
+            "uniform",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [1, 1, 1, 0], [2, 2, 2, 1], [2, 2, 2, 2]],
+            [1, 1, 1, 1],
+        ),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]],
+            [1, 1, 2, 1],
+        ),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [[0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]],
+            [1, 1, 1, 1],
+        ),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]],
+            [0, 1, 1, 1],
+        ),
+        (
+            "kmeans",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [1, 1, 1, 0], [1, 1, 1, 1], [2, 2, 2, 2]],
+            [1, 0, 3, 1],
+        ),
+        (
+            "kmeans",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [2, 2, 2, 2]],
+            [1, 1, 1, 1],
+        ),
+    ],
+)
+def test_fit_transform(strategy, quantile_method, expected, sample_weight):
+    est = KBinsDiscretizer(
+        n_bins=3, encode="ordinal", strategy=strategy, quantile_method=quantile_method
+    )
+    with ignore_warnings(category=UserWarning):
+        # Ignore the warning on removed small bins.
+        est.fit(X, sample_weight=sample_weight)
+    assert_array_equal(est.transform(X), expected)
+
+
+def test_valid_n_bins():
+    KBinsDiscretizer(n_bins=2, quantile_method="averaged_inverted_cdf").fit_transform(X)
+    KBinsDiscretizer(
+        n_bins=np.array([2])[0], quantile_method="averaged_inverted_cdf"
+    ).fit_transform(X)
+    assert KBinsDiscretizer(n_bins=2, quantile_method="averaged_inverted_cdf").fit(
+        X
+    ).n_bins_.dtype == np.dtype(int)
+
+
+def test_invalid_n_bins_array():
+    # Bad shape
+    n_bins = np.full((2, 4), 2.0)
+    est = KBinsDiscretizer(n_bins=n_bins, quantile_method="averaged_inverted_cdf")
+    err_msg = r"n_bins must be a scalar or array of shape \(n_features,\)."
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+
+    # Incorrect number of features
+    n_bins = [1, 2, 2]
+    est = KBinsDiscretizer(n_bins=n_bins, quantile_method="averaged_inverted_cdf")
+    err_msg = r"n_bins must be a scalar or array of shape \(n_features,\)."
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+
+    # Bad bin values
+    n_bins = [1, 2, 2, 1]
+    est = KBinsDiscretizer(n_bins=n_bins, quantile_method="averaged_inverted_cdf")
+    err_msg = (
+        "KBinsDiscretizer received an invalid number of bins "
+        "at indices 0, 3. Number of bins must be at least 2, "
+        "and must be an int."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+
+    # Float bin values
+    n_bins = [2.1, 2, 2.1, 2]
+    est = KBinsDiscretizer(n_bins=n_bins, quantile_method="averaged_inverted_cdf")
+    err_msg = (
+        "KBinsDiscretizer received an invalid number of bins "
+        "at indices 0, 2. Number of bins must be at least 2, "
+        "and must be an int."
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        est.fit_transform(X)
+
+
+@pytest.mark.parametrize(
+    "strategy, quantile_method, expected, sample_weight",
+    [
+        (
+            "uniform",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [0, 1, 1, 0], [1, 2, 2, 1], [1, 2, 2, 2]],
+            None,
+        ),
+        (
+            "kmeans",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 2, 2, 2]],
+            None,
+        ),
+        (
+            "quantile",
+            "linear",
+            [[0, 0, 0, 0], [0, 1, 1, 1], [1, 2, 2, 2], [1, 2, 2, 2]],
+            None,
+        ),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [[0, 0, 0, 0], [0, 1, 1, 1], [1, 2, 2, 2], [1, 2, 2, 2]],
+            None,
+        ),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [[0, 0, 0, 0], [0, 1, 1, 1], [1, 2, 2, 2], [1, 2, 2, 2]],
+            [1, 1, 1, 1],
+        ),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]],
+            [0, 1, 3, 1],
+        ),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [[0, 0, 0, 0], [0, 0, 0, 0], [1, 2, 2, 2], [1, 2, 2, 2]],
+            [1, 1, 3, 1],
+        ),
+        (
+            "kmeans",
+            "warn",  # default, will not warn when strategy != "quantile"
+            [[0, 0, 0, 0], [0, 1, 1, 0], [1, 1, 1, 1], [1, 2, 2, 2]],
+            [1, 0, 3, 1],
+        ),
+    ],
+)
+def test_fit_transform_n_bins_array(strategy, quantile_method, expected, sample_weight):
+    est = KBinsDiscretizer(
+        n_bins=[2, 3, 3, 3],
+        encode="ordinal",
+        strategy=strategy,
+        quantile_method=quantile_method,
+    ).fit(X, sample_weight=sample_weight)
+    assert_array_equal(est.transform(X), expected)
+
+    # test the shape of bin_edges_
+    n_features = np.array(X).shape[1]
+    assert est.bin_edges_.shape == (n_features,)
+    for bin_edges, n_bins in zip(est.bin_edges_, est.n_bins_):
+        assert bin_edges.shape == (n_bins + 1,)
+
+
+@pytest.mark.filterwarnings("ignore: Bins whose width are too small")
+def test_kbinsdiscretizer_effect_sample_weight():
+    """Check the impact of `sample_weight` one computed quantiles."""
+    X = np.array([[-2], [-1], [1], [3], [500], [1000]])
+    # add a large number of bins such that each sample with a non-null weight
+    # will be used as bin edge
+    est = KBinsDiscretizer(
+        n_bins=10,
+        encode="ordinal",
+        strategy="quantile",
+        quantile_method="averaged_inverted_cdf",
+    )
+    est.fit(X, sample_weight=[1, 1, 1, 1, 0, 0])
+    assert_allclose(est.bin_edges_[0], [-2, -1, 0, 1, 3])
+    assert_allclose(est.transform(X), [[0.0], [1.0], [3.0], [3.0], [3.0], [3.0]])
+
+
+@pytest.mark.parametrize("strategy", ["kmeans", "quantile"])
+def test_kbinsdiscretizer_no_mutating_sample_weight(strategy):
+    """Make sure that `sample_weight` is not changed in place."""
+
+    if strategy == "quantile":
+        est = KBinsDiscretizer(
+            n_bins=3,
+            encode="ordinal",
+            strategy=strategy,
+            quantile_method="averaged_inverted_cdf",
+        )
+    else:
+        est = KBinsDiscretizer(n_bins=3, encode="ordinal", strategy=strategy)
+    sample_weight = np.array([1, 3, 1, 2], dtype=np.float64)
+    sample_weight_copy = np.copy(sample_weight)
+    est.fit(X, sample_weight=sample_weight)
+    assert_allclose(sample_weight, sample_weight_copy)
+
+
+@pytest.mark.parametrize("strategy", ["uniform", "kmeans", "quantile"])
+def test_same_min_max(strategy):
+    warnings.simplefilter("always")
+    X = np.array([[1, -2], [1, -1], [1, 0], [1, 1]])
+    if strategy == "quantile":
+        est = KBinsDiscretizer(
+            strategy=strategy,
+            n_bins=3,
+            encode="ordinal",
+            quantile_method="averaged_inverted_cdf",
+        )
+    else:
+        est = KBinsDiscretizer(strategy=strategy, n_bins=3, encode="ordinal")
+    warning_message = "Feature 0 is constant and will be replaced with 0."
+    with pytest.warns(UserWarning, match=warning_message):
+        est.fit(X)
+    assert est.n_bins_[0] == 1
+    # replace the feature with zeros
+    Xt = est.transform(X)
+    assert_array_equal(Xt[:, 0], np.zeros(X.shape[0]))
+
+
+def test_transform_1d_behavior():
+    X = np.arange(4)
+    est = KBinsDiscretizer(n_bins=2, quantile_method="averaged_inverted_cdf")
+    with pytest.raises(ValueError):
+        est.fit(X)
+
+    est = KBinsDiscretizer(n_bins=2, quantile_method="averaged_inverted_cdf")
+    est.fit(X.reshape(-1, 1))
+    with pytest.raises(ValueError):
+        est.transform(X)
+
+
+@pytest.mark.parametrize("i", range(1, 9))
+def test_numeric_stability(i):
+    X_init = np.array([2.0, 4.0, 6.0, 8.0, 10.0]).reshape(-1, 1)
+    Xt_expected = np.array([0, 0, 1, 1, 1]).reshape(-1, 1)
+
+    # Test up to discretizing nano units
+    X = X_init / 10**i
+    Xt = KBinsDiscretizer(
+        n_bins=2, encode="ordinal", quantile_method="averaged_inverted_cdf"
+    ).fit_transform(X)
+    assert_array_equal(Xt_expected, Xt)
+
+
+def test_encode_options():
+    est = KBinsDiscretizer(
+        n_bins=[2, 3, 3, 3], encode="ordinal", quantile_method="averaged_inverted_cdf"
+    ).fit(X)
+    Xt_1 = est.transform(X)
+    est = KBinsDiscretizer(
+        n_bins=[2, 3, 3, 3],
+        encode="onehot-dense",
+        quantile_method="averaged_inverted_cdf",
+    ).fit(X)
+    Xt_2 = est.transform(X)
+    assert not sp.issparse(Xt_2)
+    assert_array_equal(
+        OneHotEncoder(
+            categories=[np.arange(i) for i in [2, 3, 3, 3]], sparse_output=False
+        ).fit_transform(Xt_1),
+        Xt_2,
+    )
+    est = KBinsDiscretizer(
+        n_bins=[2, 3, 3, 3], encode="onehot", quantile_method="averaged_inverted_cdf"
+    ).fit(X)
+    Xt_3 = est.transform(X)
+    assert sp.issparse(Xt_3)
+    assert_array_equal(
+        OneHotEncoder(
+            categories=[np.arange(i) for i in [2, 3, 3, 3]], sparse_output=True
+        )
+        .fit_transform(Xt_1)
+        .toarray(),
+        Xt_3.toarray(),
+    )
+
+
+@pytest.mark.parametrize(
+    "strategy, quantile_method, expected_2bins, expected_3bins, expected_5bins",
+    [
+        ("uniform", "warn", [0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 2, 2], [0, 0, 1, 1, 4, 4]),
+        ("kmeans", "warn", [0, 0, 0, 0, 1, 1], [0, 0, 1, 1, 2, 2], [0, 0, 1, 2, 3, 4]),
+        (
+            "quantile",
+            "averaged_inverted_cdf",
+            [0, 0, 0, 1, 1, 1],
+            [0, 0, 1, 1, 2, 2],
+            [0, 1, 2, 3, 4, 4],
+        ),
+    ],
+)
+def test_nonuniform_strategies(
+    strategy, quantile_method, expected_2bins, expected_3bins, expected_5bins
+):
+    X = np.array([0, 0.5, 2, 3, 9, 10]).reshape(-1, 1)
+
+    # with 2 bins
+    est = KBinsDiscretizer(
+        n_bins=2, strategy=strategy, quantile_method=quantile_method, encode="ordinal"
+    )
+    Xt = est.fit_transform(X)
+    assert_array_equal(expected_2bins, Xt.ravel())
+
+    # with 3 bins
+    est = KBinsDiscretizer(
+        n_bins=3, strategy=strategy, quantile_method=quantile_method, encode="ordinal"
+    )
+    Xt = est.fit_transform(X)
+    assert_array_equal(expected_3bins, Xt.ravel())
+
+    # with 5 bins
+    est = KBinsDiscretizer(
+        n_bins=5, strategy=strategy, quantile_method=quantile_method, encode="ordinal"
+    )
+    Xt = est.fit_transform(X)
+    assert_array_equal(expected_5bins, Xt.ravel())
+
+
+@pytest.mark.parametrize(
+    "strategy, expected_inv,quantile_method",
+    [
+        (
+            "uniform",
+            [
+                [-1.5, 2.0, -3.5, -0.5],
+                [-0.5, 3.0, -2.5, -0.5],
+                [0.5, 4.0, -1.5, 0.5],
+                [0.5, 4.0, -1.5, 1.5],
+            ],
+            "warn",  # default, will not warn when strategy != "quantile"
+        ),
+        (
+            "kmeans",
+            [
+                [-1.375, 2.125, -3.375, -0.5625],
+                [-1.375, 2.125, -3.375, -0.5625],
+                [-0.125, 3.375, -2.125, 0.5625],
+                [0.75, 4.25, -1.25, 1.625],
+            ],
+            "warn",  # default, will not warn when strategy != "quantile"
+        ),
+        (
+            "quantile",
+            [
+                [-1.5, 2.0, -3.5, -0.75],
+                [-0.5, 3.0, -2.5, 0.0],
+                [0.5, 4.0, -1.5, 1.25],
+                [0.5, 4.0, -1.5, 1.25],
+            ],
+            "averaged_inverted_cdf",
+        ),
+    ],
+)
+@pytest.mark.parametrize("encode", ["ordinal", "onehot", "onehot-dense"])
+def test_inverse_transform(strategy, encode, expected_inv, quantile_method):
+    kbd = KBinsDiscretizer(
+        n_bins=3, strategy=strategy, quantile_method=quantile_method, encode=encode
+    )
+    Xt = kbd.fit_transform(X)
+    Xinv = kbd.inverse_transform(Xt)
+    assert_array_almost_equal(expected_inv, Xinv)
+
+
+@pytest.mark.parametrize("strategy", ["uniform", "kmeans", "quantile"])
+def test_transform_outside_fit_range(strategy):
+    X = np.array([0, 1, 2, 3])[:, None]
+
+    if strategy == "quantile":
+        kbd = KBinsDiscretizer(
+            n_bins=4,
+            strategy=strategy,
+            encode="ordinal",
+            quantile_method="averaged_inverted_cdf",
+        )
+    else:
+        kbd = KBinsDiscretizer(n_bins=4, strategy=strategy, encode="ordinal")
+    kbd.fit(X)
+
+    X2 = np.array([-2, 5])[:, None]
+    X2t = kbd.transform(X2)
+    assert_array_equal(X2t.max(axis=0) + 1, kbd.n_bins_)
+    assert_array_equal(X2t.min(axis=0), [0])
+
+
+def test_overwrite():
+    X = np.array([0, 1, 2, 3])[:, None]
+    X_before = X.copy()
+
+    est = KBinsDiscretizer(
+        n_bins=3, quantile_method="averaged_inverted_cdf", encode="ordinal"
+    )
+    Xt = est.fit_transform(X)
+    assert_array_equal(X, X_before)
+
+    Xt_before = Xt.copy()
+    Xinv = est.inverse_transform(Xt)
+    assert_array_equal(Xt, Xt_before)
+    assert_array_equal(Xinv, np.array([[0.5], [1.5], [2.5], [2.5]]))
+
+
+@pytest.mark.parametrize(
+    "strategy, expected_bin_edges, quantile_method",
+    [
+        ("quantile", [0, 1.5, 3], "averaged_inverted_cdf"),
+        ("kmeans", [0, 1.5, 3], "warn"),
+    ],
+)
+def test_redundant_bins(strategy, expected_bin_edges, quantile_method):
+    X = [[0], [0], [0], [0], [3], [3]]
+    kbd = KBinsDiscretizer(
+        n_bins=3, strategy=strategy, quantile_method=quantile_method, subsample=None
+    )
+    warning_message = "Consider decreasing the number of bins."
+    with pytest.warns(UserWarning, match=warning_message):
+        kbd.fit(X)
+
+    assert_array_almost_equal(kbd.bin_edges_[0], expected_bin_edges)
+
+
+def test_percentile_numeric_stability():
+    X = np.array([0.05, 0.05, 0.95]).reshape(-1, 1)
+    bin_edges = np.array([0.05, 0.23, 0.41, 0.59, 0.77, 0.95])
+    Xt = np.array([0, 0, 4]).reshape(-1, 1)
+    kbd = KBinsDiscretizer(
+        n_bins=10,
+        encode="ordinal",
+        strategy="quantile",
+        quantile_method="linear",
+    )
+    ## TODO: change to averaged inverted cdf, but that means we only get bin
+    ## edges of 0.05 and 0.95 and nothing in between
+
+    warning_message = "Consider decreasing the number of bins."
+    with pytest.warns(UserWarning, match=warning_message):
+        kbd.fit(X)
+
+    assert_array_almost_equal(kbd.bin_edges_[0], bin_edges)
+    assert_array_almost_equal(kbd.transform(X), Xt)
+
+
+@pytest.mark.parametrize("in_dtype", [np.float16, np.float32, np.float64])
+@pytest.mark.parametrize("out_dtype", [None, np.float32, np.float64])
+@pytest.mark.parametrize("encode", ["ordinal", "onehot", "onehot-dense"])
+def test_consistent_dtype(in_dtype, out_dtype, encode):
+    X_input = np.array(X, dtype=in_dtype)
+    kbd = KBinsDiscretizer(
+        n_bins=3,
+        encode=encode,
+        quantile_method="averaged_inverted_cdf",
+        dtype=out_dtype,
+    )
+    kbd.fit(X_input)
+
+    # test output dtype
+    if out_dtype is not None:
+        expected_dtype = out_dtype
+    elif out_dtype is None and X_input.dtype == np.float16:
+        # wrong numeric input dtype are cast in np.float64
+        expected_dtype = np.float64
+    else:
+        expected_dtype = X_input.dtype
+    Xt = kbd.transform(X_input)
+    assert Xt.dtype == expected_dtype
+
+
+@pytest.mark.parametrize("input_dtype", [np.float16, np.float32, np.float64])
+@pytest.mark.parametrize("encode", ["ordinal", "onehot", "onehot-dense"])
+def test_32_equal_64(input_dtype, encode):
+    # TODO this check is redundant with common checks and can be removed
+    #  once #16290 is merged
+    X_input = np.array(X, dtype=input_dtype)
+
+    # 32 bit output
+    kbd_32 = KBinsDiscretizer(
+        n_bins=3,
+        encode=encode,
+        quantile_method="averaged_inverted_cdf",
+        dtype=np.float32,
+    )
+    kbd_32.fit(X_input)
+    Xt_32 = kbd_32.transform(X_input)
+
+    # 64 bit output
+    kbd_64 = KBinsDiscretizer(
+        n_bins=3,
+        encode=encode,
+        quantile_method="averaged_inverted_cdf",
+        dtype=np.float64,
+    )
+    kbd_64.fit(X_input)
+    Xt_64 = kbd_64.transform(X_input)
+
+    assert_allclose_dense_sparse(Xt_32, Xt_64)
+
+
+def test_kbinsdiscretizer_subsample_default():
+    # Since the size of X is small (< 2e5), subsampling will not take place.
+    X = np.array([-2, 1.5, -4, -1]).reshape(-1, 1)
+    kbd_default = KBinsDiscretizer(
+        n_bins=10,
+        encode="ordinal",
+        strategy="quantile",
+        quantile_method="averaged_inverted_cdf",
+    )
+    kbd_default.fit(X)
+
+    kbd_without_subsampling = clone(kbd_default)
+    kbd_without_subsampling.set_params(subsample=None)
+    kbd_without_subsampling.fit(X)
+
+    for bin_kbd_default, bin_kbd_with_subsampling in zip(
+        kbd_default.bin_edges_[0], kbd_without_subsampling.bin_edges_[0]
+    ):
+        np.testing.assert_allclose(bin_kbd_default, bin_kbd_with_subsampling)
+    assert kbd_default.bin_edges_.shape == kbd_without_subsampling.bin_edges_.shape
+
+
+@pytest.mark.parametrize(
+    "encode, expected_names",
+    [
+        (
+            "onehot",
+            [
+                f"feat{col_id}_{float(bin_id)}"
+                for col_id in range(3)
+                for bin_id in range(4)
+            ],
+        ),
+        (
+            "onehot-dense",
+            [
+                f"feat{col_id}_{float(bin_id)}"
+                for col_id in range(3)
+                for bin_id in range(4)
+            ],
+        ),
+        ("ordinal", [f"feat{col_id}" for col_id in range(3)]),
+    ],
+)
+def test_kbinsdiscrtizer_get_feature_names_out(encode, expected_names):
+    """Check get_feature_names_out for different settings.
+    Non-regression test for #22731
+    """
+    X = [[-2, 1, -4], [-1, 2, -3], [0, 3, -2], [1, 4, -1]]
+
+    kbd = KBinsDiscretizer(
+        n_bins=4, encode=encode, quantile_method="averaged_inverted_cdf"
+    ).fit(X)
+    Xt = kbd.transform(X)
+
+    input_features = [f"feat{i}" for i in range(3)]
+    output_names = kbd.get_feature_names_out(input_features)
+    assert Xt.shape[1] == output_names.shape[0]
+
+    assert_array_equal(output_names, expected_names)
+
+
+@pytest.mark.parametrize("strategy", ["uniform", "kmeans", "quantile"])
+def test_kbinsdiscretizer_subsample(strategy, global_random_seed):
+    # Check that the bin edges are almost the same when subsampling is used.
+    X = np.random.RandomState(global_random_seed).random_sample((100000, 1)) + 1
+
+    if strategy == "quantile":
+        kbd_subsampling = KBinsDiscretizer(
+            strategy=strategy,
+            subsample=50000,
+            random_state=global_random_seed,
+            quantile_method="averaged_inverted_cdf",
+        )
+    else:
+        kbd_subsampling = KBinsDiscretizer(
+            strategy=strategy, subsample=50000, random_state=global_random_seed
+        )
+    kbd_subsampling.fit(X)
+
+    kbd_no_subsampling = clone(kbd_subsampling)
+    kbd_no_subsampling.set_params(subsample=None)
+    kbd_no_subsampling.fit(X)
+
+    # We use a large tolerance because we can't expect the bin edges to be exactly the
+    # same when subsampling is used.
+    assert_allclose(
+        kbd_subsampling.bin_edges_[0], kbd_no_subsampling.bin_edges_[0], rtol=1e-2
+    )
+
+
+def test_quantile_method_future_warnings():
+    X = [[-2, 1, -4], [-1, 2, -3], [0, 3, -2], [1, 4, -1]]
+    with pytest.warns(
+        FutureWarning,
+        match="The current default behavior, quantile_method='linear', will be "
+        "changed to quantile_method='averaged_inverted_cdf' in "
+        "scikit-learn version 1.9 to naturally support sample weight "
+        "equivalence properties by default. Pass "
+        "quantile_method='averaged_inverted_cdf' explicitly to silence this "
+        "warning.",
+    ):
+        KBinsDiscretizer(strategy="quantile").fit(X)
+
+
+def test_invalid_quantile_method_with_sample_weight():
+    X = [[-2, 1, -4], [-1, 2, -3], [0, 3, -2], [1, 4, -1]]
+    expected_msg = (
+        "When fitting with strategy='quantile' and sample weights, "
+        "quantile_method should either be set to 'averaged_inverted_cdf' or "
+        "'inverted_cdf', got quantile_method='linear' instead."
+    )
+    with pytest.raises(
+        ValueError,
+        match=expected_msg,
+    ):
+        KBinsDiscretizer(strategy="quantile", quantile_method="linear").fit(
+            X,
+            sample_weight=[1, 1, 2, 2],
+        )
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_encoders.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_encoders.py
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_function_transformer.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_function_transformer.py
@@ -0,0 +1,579 @@
+import warnings
+
+import numpy as np
+import pytest
+
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import FunctionTransformer, StandardScaler
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose_dense_sparse,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+
+
+def _make_func(args_store, kwargs_store, func=lambda X, *a, **k: X):
+    def _func(X, *args, **kwargs):
+        args_store.append(X)
+        args_store.extend(args)
+        kwargs_store.update(kwargs)
+        return func(X)
+
+    return _func
+
+
+def test_delegate_to_func():
+    # (args|kwargs)_store will hold the positional and keyword arguments
+    # passed to the function inside the FunctionTransformer.
+    args_store = []
+    kwargs_store = {}
+    X = np.arange(10).reshape((5, 2))
+    assert_array_equal(
+        FunctionTransformer(_make_func(args_store, kwargs_store)).transform(X),
+        X,
+        "transform should have returned X unchanged",
+    )
+
+    # The function should only have received X.
+    assert args_store == [X], (
+        "Incorrect positional arguments passed to func: {args}".format(args=args_store)
+    )
+
+    assert not kwargs_store, (
+        "Unexpected keyword arguments passed to func: {args}".format(args=kwargs_store)
+    )
+
+    # reset the argument stores.
+    args_store[:] = []
+    kwargs_store.clear()
+    transformed = FunctionTransformer(
+        _make_func(args_store, kwargs_store),
+    ).transform(X)
+
+    assert_array_equal(
+        transformed, X, err_msg="transform should have returned X unchanged"
+    )
+
+    # The function should have received X
+    assert args_store == [X], (
+        "Incorrect positional arguments passed to func: {args}".format(args=args_store)
+    )
+
+    assert not kwargs_store, (
+        "Unexpected keyword arguments passed to func: {args}".format(args=kwargs_store)
+    )
+
+
+def test_np_log():
+    X = np.arange(10).reshape((5, 2))
+
+    # Test that the numpy.log example still works.
+    assert_array_equal(
+        FunctionTransformer(np.log1p).transform(X),
+        np.log1p(X),
+    )
+
+
+def test_kw_arg():
+    X = np.linspace(0, 1, num=10).reshape((5, 2))
+
+    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
+
+    # Test that rounding is correct
+    assert_array_equal(F.transform(X), np.around(X, decimals=3))
+
+
+def test_kw_arg_update():
+    X = np.linspace(0, 1, num=10).reshape((5, 2))
+
+    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
+
+    F.kw_args["decimals"] = 1
+
+    # Test that rounding is correct
+    assert_array_equal(F.transform(X), np.around(X, decimals=1))
+
+
+def test_kw_arg_reset():
+    X = np.linspace(0, 1, num=10).reshape((5, 2))
+
+    F = FunctionTransformer(np.around, kw_args=dict(decimals=3))
+
+    F.kw_args = dict(decimals=1)
+
+    # Test that rounding is correct
+    assert_array_equal(F.transform(X), np.around(X, decimals=1))
+
+
+def test_inverse_transform():
+    X = np.array([1, 4, 9, 16]).reshape((2, 2))
+
+    # Test that inverse_transform works correctly
+    F = FunctionTransformer(
+        func=np.sqrt,
+        inverse_func=np.around,
+        inv_kw_args=dict(decimals=3),
+    )
+    assert_array_equal(
+        F.inverse_transform(F.transform(X)),
+        np.around(np.sqrt(X), decimals=3),
+    )
+
+
+@pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS + CSR_CONTAINERS)
+def test_check_inverse(sparse_container):
+    X = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))
+    if sparse_container is not None:
+        X = sparse_container(X)
+
+    trans = FunctionTransformer(
+        func=np.sqrt,
+        inverse_func=np.around,
+        accept_sparse=sparse_container is not None,
+        check_inverse=True,
+        validate=True,
+    )
+    warning_message = (
+        "The provided functions are not strictly"
+        " inverse of each other. If you are sure you"
+        " want to proceed regardless, set"
+        " 'check_inverse=False'."
+    )
+    with pytest.warns(UserWarning, match=warning_message):
+        trans.fit(X)
+
+    trans = FunctionTransformer(
+        func=np.expm1,
+        inverse_func=np.log1p,
+        accept_sparse=sparse_container is not None,
+        check_inverse=True,
+        validate=True,
+    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        Xt = trans.fit_transform(X)
+
+    assert_allclose_dense_sparse(X, trans.inverse_transform(Xt))
+
+
+def test_check_inverse_func_or_inverse_not_provided():
+    # check that we don't check inverse when one of the func or inverse is not
+    # provided.
+    X = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))
+
+    trans = FunctionTransformer(
+        func=np.expm1, inverse_func=None, check_inverse=True, validate=True
+    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        trans.fit(X)
+    trans = FunctionTransformer(
+        func=None, inverse_func=np.expm1, check_inverse=True, validate=True
+    )
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        trans.fit(X)
+
+
+def test_function_transformer_frame():
+    pd = pytest.importorskip("pandas")
+    X_df = pd.DataFrame(np.random.randn(100, 10))
+    transformer = FunctionTransformer()
+    X_df_trans = transformer.fit_transform(X_df)
+    assert hasattr(X_df_trans, "loc")
+
+
+@pytest.mark.parametrize("X_type", ["array", "series"])
+def test_function_transformer_raise_error_with_mixed_dtype(X_type):
+    """Check that `FunctionTransformer.check_inverse` raises error on mixed dtype."""
+    mapping = {"one": 1, "two": 2, "three": 3, 5: "five", 6: "six"}
+    inverse_mapping = {value: key for key, value in mapping.items()}
+    dtype = "object"
+
+    data = ["one", "two", "three", "one", "one", 5, 6]
+    data = _convert_container(data, X_type, columns_name=["value"], dtype=dtype)
+
+    def func(X):
+        return np.array([mapping[X[i]] for i in range(X.size)], dtype=object)
+
+    def inverse_func(X):
+        return _convert_container(
+            [inverse_mapping[x] for x in X],
+            X_type,
+            columns_name=["value"],
+            dtype=dtype,
+        )
+
+    transformer = FunctionTransformer(
+        func=func, inverse_func=inverse_func, validate=False, check_inverse=True
+    )
+
+    msg = "'check_inverse' is only supported when all the elements in `X` is numerical."
+    with pytest.raises(ValueError, match=msg):
+        transformer.fit(data)
+
+
+def test_function_transformer_support_all_nummerical_dataframes_check_inverse_True():
+    """Check support for dataframes with only numerical values."""
+    pd = pytest.importorskip("pandas")
+
+    df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+    transformer = FunctionTransformer(
+        func=lambda x: x + 2, inverse_func=lambda x: x - 2, check_inverse=True
+    )
+
+    # Does not raise an error
+    df_out = transformer.fit_transform(df)
+    assert_allclose_dense_sparse(df_out, df + 2)
+
+
+def test_function_transformer_with_dataframe_and_check_inverse_True():
+    """Check error is raised when check_inverse=True.
+
+    Non-regresion test for gh-25261.
+    """
+    pd = pytest.importorskip("pandas")
+    transformer = FunctionTransformer(
+        func=lambda x: x, inverse_func=lambda x: x, check_inverse=True
+    )
+
+    df_mixed = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    msg = "'check_inverse' is only supported when all the elements in `X` is numerical."
+    with pytest.raises(ValueError, match=msg):
+        transformer.fit(df_mixed)
+
+
+@pytest.mark.parametrize(
+    "X, feature_names_out, input_features, expected",
+    [
+        (
+            # NumPy inputs, default behavior: generate names
+            np.random.rand(100, 3),
+            "one-to-one",
+            None,
+            ("x0", "x1", "x2"),
+        ),
+        (
+            # Pandas input, default behavior: use input feature names
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            "one-to-one",
+            None,
+            ("a", "b"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable
+            np.random.rand(100, 3),
+            lambda transformer, input_features: ("a", "b"),
+            None,
+            ("a", "b"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: ("c", "d", "e"),
+            None,
+            ("c", "d", "e"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable – default input_features
+            np.random.rand(100, 3),
+            lambda transformer, input_features: tuple(input_features) + ("a",),
+            None,
+            ("x0", "x1", "x2", "a"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable – default input_features
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: tuple(input_features) + ("c",),
+            None,
+            ("a", "b", "c"),
+        ),
+        (
+            # NumPy input, input_features=list of names
+            np.random.rand(100, 3),
+            "one-to-one",
+            ("a", "b", "c"),
+            ("a", "b", "c"),
+        ),
+        (
+            # Pandas input, input_features=list of names
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            "one-to-one",
+            ("a", "b"),  # must match feature_names_in_
+            ("a", "b"),
+        ),
+        (
+            # NumPy input, feature_names_out=callable, input_features=list
+            np.random.rand(100, 3),
+            lambda transformer, input_features: tuple(input_features) + ("d",),
+            ("a", "b", "c"),
+            ("a", "b", "c", "d"),
+        ),
+        (
+            # Pandas input, feature_names_out=callable, input_features=list
+            {"a": np.random.rand(100), "b": np.random.rand(100)},
+            lambda transformer, input_features: tuple(input_features) + ("c",),
+            ("a", "b"),  # must match feature_names_in_
+            ("a", "b", "c"),
+        ),
+    ],
+)
+@pytest.mark.parametrize("validate", [True, False])
+def test_function_transformer_get_feature_names_out(
+    X, feature_names_out, input_features, expected, validate
+):
+    if isinstance(X, dict):
+        pd = pytest.importorskip("pandas")
+        X = pd.DataFrame(X)
+
+    transformer = FunctionTransformer(
+        feature_names_out=feature_names_out, validate=validate
+    )
+    transformer.fit(X)
+    names = transformer.get_feature_names_out(input_features)
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, expected)
+
+
+def test_function_transformer_get_feature_names_out_without_validation():
+    transformer = FunctionTransformer(feature_names_out="one-to-one", validate=False)
+    X = np.random.rand(100, 2)
+    transformer.fit_transform(X)
+
+    names = transformer.get_feature_names_out(("a", "b"))
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, ("a", "b"))
+
+
+def test_function_transformer_feature_names_out_is_None():
+    transformer = FunctionTransformer()
+    X = np.random.rand(100, 2)
+    transformer.fit_transform(X)
+
+    msg = "This 'FunctionTransformer' has no attribute 'get_feature_names_out'"
+    with pytest.raises(AttributeError, match=msg):
+        transformer.get_feature_names_out()
+
+
+def test_function_transformer_feature_names_out_uses_estimator():
+    def add_n_random_features(X, n):
+        return np.concatenate([X, np.random.rand(len(X), n)], axis=1)
+
+    def feature_names_out(transformer, input_features):
+        n = transformer.kw_args["n"]
+        return list(input_features) + [f"rnd{i}" for i in range(n)]
+
+    transformer = FunctionTransformer(
+        func=add_n_random_features,
+        feature_names_out=feature_names_out,
+        kw_args=dict(n=3),
+        validate=True,
+    )
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame({"a": np.random.rand(100), "b": np.random.rand(100)})
+    transformer.fit_transform(df)
+    names = transformer.get_feature_names_out()
+
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, ("a", "b", "rnd0", "rnd1", "rnd2"))
+
+
+def test_function_transformer_validate_inverse():
+    """Test that function transformer does not reset estimator in
+    `inverse_transform`."""
+
+    def add_constant_feature(X):
+        X_one = np.ones((X.shape[0], 1))
+        return np.concatenate((X, X_one), axis=1)
+
+    def inverse_add_constant(X):
+        return X[:, :-1]
+
+    X = np.array([[1, 2], [3, 4], [3, 4]])
+    trans = FunctionTransformer(
+        func=add_constant_feature,
+        inverse_func=inverse_add_constant,
+        validate=True,
+    )
+    X_trans = trans.fit_transform(X)
+    assert trans.n_features_in_ == X.shape[1]
+
+    trans.inverse_transform(X_trans)
+    assert trans.n_features_in_ == X.shape[1]
+
+
+@pytest.mark.parametrize(
+    "feature_names_out, expected",
+    [
+        ("one-to-one", ["pet", "color"]),
+        [lambda est, names: [f"{n}_out" for n in names], ["pet_out", "color_out"]],
+    ],
+)
+@pytest.mark.parametrize("in_pipeline", [True, False])
+def test_get_feature_names_out_dataframe_with_string_data(
+    feature_names_out, expected, in_pipeline
+):
+    """Check that get_feature_names_out works with DataFrames with string data."""
+    pd = pytest.importorskip("pandas")
+    X = pd.DataFrame({"pet": ["dog", "cat"], "color": ["red", "green"]})
+
+    def func(X):
+        if feature_names_out == "one-to-one":
+            return X
+        else:
+            name = feature_names_out(None, X.columns)
+            return X.rename(columns=dict(zip(X.columns, name)))
+
+    transformer = FunctionTransformer(func=func, feature_names_out=feature_names_out)
+    if in_pipeline:
+        transformer = make_pipeline(transformer)
+
+    X_trans = transformer.fit_transform(X)
+    assert isinstance(X_trans, pd.DataFrame)
+
+    names = transformer.get_feature_names_out()
+    assert isinstance(names, np.ndarray)
+    assert names.dtype == object
+    assert_array_equal(names, expected)
+
+
+def test_set_output_func():
+    """Check behavior of set_output with different settings."""
+    pd = pytest.importorskip("pandas")
+
+    X = pd.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
+
+    ft = FunctionTransformer(np.log, feature_names_out="one-to-one")
+
+    # no warning is raised when feature_names_out is defined
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        ft.set_output(transform="pandas")
+
+    X_trans = ft.fit_transform(X)
+    assert isinstance(X_trans, pd.DataFrame)
+    assert_array_equal(X_trans.columns, ["a", "b"])
+
+    ft = FunctionTransformer(lambda x: 2 * x)
+    ft.set_output(transform="pandas")
+
+    # no warning is raised when func returns a panda dataframe
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        X_trans = ft.fit_transform(X)
+    assert isinstance(X_trans, pd.DataFrame)
+    assert_array_equal(X_trans.columns, ["a", "b"])
+
+    # Warning is raised when func returns a ndarray
+    ft_np = FunctionTransformer(lambda x: np.asarray(x))
+
+    for transform in ("pandas", "polars"):
+        ft_np.set_output(transform=transform)
+        msg = (
+            f"When `set_output` is configured to be '{transform}'.*{transform} "
+            "DataFrame.*"
+        )
+        with pytest.warns(UserWarning, match=msg):
+            ft_np.fit_transform(X)
+
+    # default transform does not warn
+    ft_np.set_output(transform="default")
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        ft_np.fit_transform(X)
+
+
+def test_consistence_column_name_between_steps():
+    """Check that we have a consistence between the feature names out of
+    `FunctionTransformer` and the feature names in of the next step in the pipeline.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27695
+    """
+    pd = pytest.importorskip("pandas")
+
+    def with_suffix(_, names):
+        return [name + "__log" for name in names]
+
+    pipeline = make_pipeline(
+        FunctionTransformer(np.log1p, feature_names_out=with_suffix), StandardScaler()
+    )
+
+    df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], columns=["a", "b"])
+    X_trans = pipeline.fit_transform(df)
+    assert pipeline.get_feature_names_out().tolist() == ["a__log", "b__log"]
+    # StandardScaler will convert to a numpy array
+    assert isinstance(X_trans, np.ndarray)
+
+
+@pytest.mark.parametrize("dataframe_lib", ["pandas", "polars"])
+@pytest.mark.parametrize("transform_output", ["default", "pandas", "polars"])
+def test_function_transformer_overwrite_column_names(dataframe_lib, transform_output):
+    """Check that we overwrite the column names when we should."""
+    lib = pytest.importorskip(dataframe_lib)
+    if transform_output != "numpy":
+        pytest.importorskip(transform_output)
+
+    df = lib.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
+
+    def with_suffix(_, names):
+        return [name + "__log" for name in names]
+
+    transformer = FunctionTransformer(feature_names_out=with_suffix).set_output(
+        transform=transform_output
+    )
+    X_trans = transformer.fit_transform(df)
+    assert_array_equal(np.asarray(X_trans), np.asarray(df))
+
+    feature_names = transformer.get_feature_names_out()
+    assert list(X_trans.columns) == with_suffix(None, df.columns)
+    assert feature_names.tolist() == with_suffix(None, df.columns)
+
+
+@pytest.mark.parametrize(
+    "feature_names_out",
+    ["one-to-one", lambda _, names: [f"{name}_log" for name in names]],
+)
+def test_function_transformer_overwrite_column_names_numerical(feature_names_out):
+    """Check the same as `test_function_transformer_overwrite_column_names`
+    but for the specific case of pandas where column names can be numerical."""
+    pd = pytest.importorskip("pandas")
+
+    df = pd.DataFrame({0: [1, 2, 3], 1: [10, 20, 100]})
+
+    transformer = FunctionTransformer(feature_names_out=feature_names_out)
+    X_trans = transformer.fit_transform(df)
+    assert_array_equal(np.asarray(X_trans), np.asarray(df))
+
+    feature_names = transformer.get_feature_names_out()
+    assert list(X_trans.columns) == list(feature_names)
+
+
+@pytest.mark.parametrize("dataframe_lib", ["pandas", "polars"])
+@pytest.mark.parametrize(
+    "feature_names_out",
+    ["one-to-one", lambda _, names: [f"{name}_log" for name in names]],
+)
+def test_function_transformer_error_column_inconsistent(
+    dataframe_lib, feature_names_out
+):
+    """Check that we raise an error when `func` returns a dataframe with new
+    column names that become inconsistent with `get_feature_names_out`."""
+    lib = pytest.importorskip(dataframe_lib)
+
+    df = lib.DataFrame({"a": [1, 2, 3], "b": [10, 20, 100]})
+
+    def func(df):
+        if dataframe_lib == "pandas":
+            return df.rename(columns={"a": "c"})
+        else:
+            return df.rename({"a": "c"})
+
+    transformer = FunctionTransformer(func=func, feature_names_out=feature_names_out)
+    err_msg = "The output generated by `func` have different column names"
+    with pytest.raises(ValueError, match=err_msg):
+        transformer.fit_transform(df).columns
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_label.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_label.py
@@ -0,0 +1,748 @@
+import numpy as np
+import pytest
+from scipy.sparse import issparse
+
+from sklearn import config_context, datasets
+from sklearn.preprocessing._label import (
+    LabelBinarizer,
+    LabelEncoder,
+    MultiLabelBinarizer,
+    _inverse_binarize_multiclass,
+    _inverse_binarize_thresholding,
+    label_binarize,
+)
+from sklearn.utils._array_api import (
+    _convert_to_numpy,
+    _get_namespace_device_dtype_ids,
+    get_namespace,
+    yield_namespace_device_dtype_combinations,
+)
+from sklearn.utils._testing import (
+    _array_api_for_tests,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import (
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+from sklearn.utils.multiclass import type_of_target
+from sklearn.utils.validation import _to_object_array
+
+iris = datasets.load_iris()
+
+
+def toarray(a):
+    if hasattr(a, "toarray"):
+        a = a.toarray()
+    return a
+
+
+def test_label_binarizer():
+    # one-class case defaults to negative label
+    # For dense case:
+    inp = ["pos", "pos", "pos", "pos"]
+    lb = LabelBinarizer(sparse_output=False)
+    expected = np.array([[0, 0, 0, 0]]).T
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ["pos"])
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+    # For sparse case:
+    lb = LabelBinarizer(sparse_output=True)
+    got = lb.fit_transform(inp)
+    assert issparse(got)
+    assert_array_equal(lb.classes_, ["pos"])
+    assert_array_equal(expected, got.toarray())
+    assert_array_equal(lb.inverse_transform(got.toarray()), inp)
+
+    lb = LabelBinarizer(sparse_output=False)
+    # two-class case
+    inp = ["neg", "pos", "pos", "neg"]
+    expected = np.array([[0, 1, 1, 0]]).T
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ["neg", "pos"])
+    assert_array_equal(expected, got)
+
+    to_invert = np.array([[1, 0], [0, 1], [0, 1], [1, 0]])
+    assert_array_equal(lb.inverse_transform(to_invert), inp)
+
+    # multi-class case
+    inp = ["spam", "ham", "eggs", "ham", "0"]
+    expected = np.array(
+        [[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 1, 0], [1, 0, 0, 0]]
+    )
+    got = lb.fit_transform(inp)
+    assert_array_equal(lb.classes_, ["0", "eggs", "ham", "spam"])
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+
+def test_label_binarizer_unseen_labels():
+    lb = LabelBinarizer()
+
+    expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
+    got = lb.fit_transform(["b", "d", "e"])
+    assert_array_equal(expected, got)
+
+    expected = np.array(
+        [[0, 0, 0], [1, 0, 0], [0, 0, 0], [0, 1, 0], [0, 0, 1], [0, 0, 0]]
+    )
+    got = lb.transform(["a", "b", "c", "d", "e", "f"])
+    assert_array_equal(expected, got)
+
+
+def test_label_binarizer_set_label_encoding():
+    lb = LabelBinarizer(neg_label=-2, pos_label=0)
+
+    # two-class case with pos_label=0
+    inp = np.array([0, 1, 1, 0])
+    expected = np.array([[-2, 0, 0, -2]]).T
+    got = lb.fit_transform(inp)
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+    lb = LabelBinarizer(neg_label=-2, pos_label=2)
+
+    # multi-class case
+    inp = np.array([3, 2, 1, 2, 0])
+    expected = np.array(
+        [
+            [-2, -2, -2, +2],
+            [-2, -2, +2, -2],
+            [-2, +2, -2, -2],
+            [-2, -2, +2, -2],
+            [+2, -2, -2, -2],
+        ]
+    )
+    got = lb.fit_transform(inp)
+    assert_array_equal(expected, got)
+    assert_array_equal(lb.inverse_transform(got), inp)
+
+
+@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
+@pytest.mark.parametrize("unique_first", [True, False])
+def test_label_binarizer_pandas_nullable(dtype, unique_first):
+    """Checks that LabelBinarizer works with pandas nullable dtypes.
+
+    Non-regression test for gh-25637.
+    """
+    pd = pytest.importorskip("pandas")
+
+    y_true = pd.Series([1, 0, 0, 1, 0, 1, 1, 0, 1], dtype=dtype)
+    if unique_first:
+        # Calling unique creates a pandas array which has a different interface
+        # compared to a pandas Series. Specifically, pandas arrays do not have "iloc".
+        y_true = y_true.unique()
+    lb = LabelBinarizer().fit(y_true)
+    y_out = lb.transform([1, 0])
+
+    assert_array_equal(y_out, [[1], [0]])
+
+
+def test_label_binarizer_errors():
+    # Check that invalid arguments yield ValueError
+    one_class = np.array([0, 0, 0, 0])
+    lb = LabelBinarizer().fit(one_class)
+
+    multi_label = [(2, 3), (0,), (0, 2)]
+    err_msg = "You appear to be using a legacy multi-label data representation."
+    with pytest.raises(ValueError, match=err_msg):
+        lb.transform(multi_label)
+
+    lb = LabelBinarizer()
+    err_msg = "This LabelBinarizer instance is not fitted yet"
+    with pytest.raises(ValueError, match=err_msg):
+        lb.transform([])
+    with pytest.raises(ValueError, match=err_msg):
+        lb.inverse_transform([])
+
+    input_labels = [0, 1, 0, 1]
+    err_msg = "neg_label=2 must be strictly less than pos_label=1."
+    lb = LabelBinarizer(neg_label=2, pos_label=1)
+    with pytest.raises(ValueError, match=err_msg):
+        lb.fit(input_labels)
+    err_msg = "neg_label=2 must be strictly less than pos_label=2."
+    lb = LabelBinarizer(neg_label=2, pos_label=2)
+    with pytest.raises(ValueError, match=err_msg):
+        lb.fit(input_labels)
+    err_msg = (
+        "Sparse binarization is only supported with non zero pos_label and zero "
+        "neg_label, got pos_label=2 and neg_label=1"
+    )
+    lb = LabelBinarizer(neg_label=1, pos_label=2, sparse_output=True)
+    with pytest.raises(ValueError, match=err_msg):
+        lb.fit(input_labels)
+
+    # Sequence of seq type should raise ValueError
+    y_seq_of_seqs = [[], [1, 2], [3], [0, 1, 3], [2]]
+    err_msg = "You appear to be using a legacy multi-label data representation"
+    with pytest.raises(ValueError, match=err_msg):
+        LabelBinarizer().fit_transform(y_seq_of_seqs)
+
+    # Fail on the dimension of 'binary'
+    err_msg = "output_type='binary', but y.shape"
+    with pytest.raises(ValueError, match=err_msg):
+        _inverse_binarize_thresholding(
+            y=np.array([[1, 2, 3], [2, 1, 3]]),
+            output_type="binary",
+            classes=[1, 2, 3],
+            threshold=0,
+        )
+
+    # Fail on multioutput data
+    err_msg = "Multioutput target data is not supported with label binarization"
+    with pytest.raises(ValueError, match=err_msg):
+        LabelBinarizer().fit(np.array([[1, 3], [2, 1]]))
+    with pytest.raises(ValueError, match=err_msg):
+        label_binarize(np.array([[1, 3], [2, 1]]), classes=[1, 2, 3])
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_label_binarizer_sparse_errors(csr_container):
+    # Fail on y_type
+    err_msg = "foo format is not supported"
+    with pytest.raises(ValueError, match=err_msg):
+        _inverse_binarize_thresholding(
+            y=csr_container([[1, 2], [2, 1]]),
+            output_type="foo",
+            classes=[1, 2],
+            threshold=0,
+        )
+
+    # Fail on the number of classes
+    err_msg = "The number of class is not equal to the number of dimension of y."
+    with pytest.raises(ValueError, match=err_msg):
+        _inverse_binarize_thresholding(
+            y=csr_container([[1, 2], [2, 1]]),
+            output_type="foo",
+            classes=[1, 2, 3],
+            threshold=0,
+        )
+
+
+@pytest.mark.parametrize(
+    "values, classes, unknown",
+    [
+        (
+            np.array([2, 1, 3, 1, 3], dtype="int64"),
+            np.array([1, 2, 3], dtype="int64"),
+            np.array([4], dtype="int64"),
+        ),
+        (
+            np.array(["b", "a", "c", "a", "c"], dtype=object),
+            np.array(["a", "b", "c"], dtype=object),
+            np.array(["d"], dtype=object),
+        ),
+        (
+            np.array(["b", "a", "c", "a", "c"]),
+            np.array(["a", "b", "c"]),
+            np.array(["d"]),
+        ),
+    ],
+    ids=["int64", "object", "str"],
+)
+def test_label_encoder(values, classes, unknown):
+    # Test LabelEncoder's transform, fit_transform and
+    # inverse_transform methods
+    le = LabelEncoder()
+    le.fit(values)
+    assert_array_equal(le.classes_, classes)
+    assert_array_equal(le.transform(values), [1, 0, 2, 0, 2])
+    assert_array_equal(le.inverse_transform([1, 0, 2, 0, 2]), values)
+    le = LabelEncoder()
+    ret = le.fit_transform(values)
+    assert_array_equal(ret, [1, 0, 2, 0, 2])
+
+    with pytest.raises(ValueError, match="unseen labels"):
+        le.transform(unknown)
+
+
+def test_label_encoder_negative_ints():
+    le = LabelEncoder()
+    le.fit([1, 1, 4, 5, -1, 0])
+    assert_array_equal(le.classes_, [-1, 0, 1, 4, 5])
+    assert_array_equal(le.transform([0, 1, 4, 4, 5, -1, -1]), [1, 2, 3, 3, 4, 0, 0])
+    assert_array_equal(
+        le.inverse_transform([1, 2, 3, 3, 4, 0, 0]), [0, 1, 4, 4, 5, -1, -1]
+    )
+    with pytest.raises(ValueError):
+        le.transform([0, 6])
+
+
+@pytest.mark.parametrize("dtype", ["str", "object"])
+def test_label_encoder_str_bad_shape(dtype):
+    le = LabelEncoder()
+    le.fit(np.array(["apple", "orange"], dtype=dtype))
+    msg = "should be a 1d array"
+    with pytest.raises(ValueError, match=msg):
+        le.transform("apple")
+
+
+def test_label_encoder_errors():
+    # Check that invalid arguments yield ValueError
+    le = LabelEncoder()
+    with pytest.raises(ValueError):
+        le.transform([])
+    with pytest.raises(ValueError):
+        le.inverse_transform([])
+
+    # Fail on unseen labels
+    le = LabelEncoder()
+    le.fit([1, 2, 3, -1, 1])
+    msg = "contains previously unseen labels"
+    with pytest.raises(ValueError, match=msg):
+        le.inverse_transform([-2])
+    with pytest.raises(ValueError, match=msg):
+        le.inverse_transform([-2, -3, -4])
+
+    # Fail on inverse_transform("")
+    msg = r"should be a 1d array.+shape \(\)"
+    with pytest.raises(ValueError, match=msg):
+        le.inverse_transform("")
+
+
+@pytest.mark.parametrize(
+    "values",
+    [
+        np.array([2, 1, 3, 1, 3], dtype="int64"),
+        np.array(["b", "a", "c", "a", "c"], dtype=object),
+        np.array(["b", "a", "c", "a", "c"]),
+    ],
+    ids=["int64", "object", "str"],
+)
+def test_label_encoder_empty_array(values):
+    le = LabelEncoder()
+    le.fit(values)
+    # test empty transform
+    transformed = le.transform([])
+    assert_array_equal(np.array([]), transformed)
+    # test empty inverse transform
+    inverse_transformed = le.inverse_transform([])
+    assert_array_equal(np.array([]), inverse_transformed)
+
+
+def test_sparse_output_multilabel_binarizer():
+    # test input as iterable of iterables
+    inputs = [
+        lambda: [(2, 3), (1,), (1, 2)],
+        lambda: ({2, 3}, {1}, {1, 2}),
+        lambda: iter([iter((2, 3)), iter((1,)), {1, 2}]),
+    ]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
+
+    inverse = inputs[0]()
+    for sparse_output in [True, False]:
+        for inp in inputs:
+            # With fit_transform
+            mlb = MultiLabelBinarizer(sparse_output=sparse_output)
+            got = mlb.fit_transform(inp())
+            assert issparse(got) == sparse_output
+            if sparse_output:
+                # verify CSR assumption that indices and indptr have same dtype
+                assert got.indices.dtype == got.indptr.dtype
+                got = got.toarray()
+            assert_array_equal(indicator_mat, got)
+            assert_array_equal([1, 2, 3], mlb.classes_)
+            assert mlb.inverse_transform(got) == inverse
+
+            # With fit
+            mlb = MultiLabelBinarizer(sparse_output=sparse_output)
+            got = mlb.fit(inp()).transform(inp())
+            assert issparse(got) == sparse_output
+            if sparse_output:
+                # verify CSR assumption that indices and indptr have same dtype
+                assert got.indices.dtype == got.indptr.dtype
+                got = got.toarray()
+            assert_array_equal(indicator_mat, got)
+            assert_array_equal([1, 2, 3], mlb.classes_)
+            assert mlb.inverse_transform(got) == inverse
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse_output_multilabel_binarizer_errors(csr_container):
+    inp = iter([iter((2, 3)), iter((1,)), {1, 2}])
+    mlb = MultiLabelBinarizer(sparse_output=False)
+    mlb.fit(inp)
+    with pytest.raises(ValueError):
+        mlb.inverse_transform(
+            csr_container(np.array([[0, 1, 1], [2, 0, 0], [1, 1, 0]]))
+        )
+
+
+def test_multilabel_binarizer():
+    # test input as iterable of iterables
+    inputs = [
+        lambda: [(2, 3), (1,), (1, 2)],
+        lambda: ({2, 3}, {1}, {1, 2}),
+        lambda: iter([iter((2, 3)), iter((1,)), {1, 2}]),
+    ]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
+    inverse = inputs[0]()
+    for inp in inputs:
+        # With fit_transform
+        mlb = MultiLabelBinarizer()
+        got = mlb.fit_transform(inp())
+        assert_array_equal(indicator_mat, got)
+        assert_array_equal([1, 2, 3], mlb.classes_)
+        assert mlb.inverse_transform(got) == inverse
+
+        # With fit
+        mlb = MultiLabelBinarizer()
+        got = mlb.fit(inp()).transform(inp())
+        assert_array_equal(indicator_mat, got)
+        assert_array_equal([1, 2, 3], mlb.classes_)
+        assert mlb.inverse_transform(got) == inverse
+
+
+def test_multilabel_binarizer_empty_sample():
+    mlb = MultiLabelBinarizer()
+    y = [[1, 2], [1], []]
+    Y = np.array([[1, 1], [1, 0], [0, 0]])
+    assert_array_equal(mlb.fit_transform(y), Y)
+
+
+def test_multilabel_binarizer_unknown_class():
+    mlb = MultiLabelBinarizer()
+    y = [[1, 2]]
+    Y = np.array([[1, 0], [0, 1]])
+    warning_message = "unknown class.* will be ignored"
+    with pytest.warns(UserWarning, match=warning_message):
+        matrix = mlb.fit(y).transform([[4, 1], [2, 0]])
+
+    Y = np.array([[1, 0, 0], [0, 1, 0]])
+    mlb = MultiLabelBinarizer(classes=[1, 2, 3])
+    with pytest.warns(UserWarning, match=warning_message):
+        matrix = mlb.fit(y).transform([[4, 1], [2, 0]])
+    assert_array_equal(matrix, Y)
+
+
+def test_multilabel_binarizer_given_classes():
+    inp = [(2, 3), (1,), (1, 2)]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]])
+    # fit_transform()
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+    assert_array_equal(mlb.classes_, [1, 3, 2])
+
+    # fit().transform()
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
+    assert_array_equal(mlb.classes_, [1, 3, 2])
+
+    # ensure works with extra class
+    mlb = MultiLabelBinarizer(classes=[4, 1, 3, 2])
+    assert_array_equal(
+        mlb.fit_transform(inp), np.hstack(([[0], [0], [0]], indicator_mat))
+    )
+    assert_array_equal(mlb.classes_, [4, 1, 3, 2])
+
+    # ensure fit is no-op as iterable is not consumed
+    inp = iter(inp)
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
+
+    # ensure a ValueError is thrown if given duplicate classes
+    err_msg = (
+        "The classes argument contains duplicate classes. Remove "
+        "these duplicates before passing them to MultiLabelBinarizer."
+    )
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2, 3])
+    with pytest.raises(ValueError, match=err_msg):
+        mlb.fit(inp)
+
+
+def test_multilabel_binarizer_multiple_calls():
+    inp = [(2, 3), (1,), (1, 2)]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 0, 1]])
+
+    indicator_mat2 = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
+
+    # first call
+    mlb = MultiLabelBinarizer(classes=[1, 3, 2])
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+    # second call change class
+    mlb.classes = [1, 2, 3]
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat2)
+
+
+def test_multilabel_binarizer_same_length_sequence():
+    # Ensure sequences of the same length are not interpreted as a 2-d array
+    inp = [[1], [0], [2]]
+    indicator_mat = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 1]])
+    # fit_transform()
+    mlb = MultiLabelBinarizer()
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+    assert_array_equal(mlb.inverse_transform(indicator_mat), inp)
+
+    # fit().transform()
+    mlb = MultiLabelBinarizer()
+    assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
+    assert_array_equal(mlb.inverse_transform(indicator_mat), inp)
+
+
+def test_multilabel_binarizer_non_integer_labels():
+    tuple_classes = _to_object_array([(1,), (2,), (3,)])
+    inputs = [
+        ([("2", "3"), ("1",), ("1", "2")], ["1", "2", "3"]),
+        ([("b", "c"), ("a",), ("a", "b")], ["a", "b", "c"]),
+        ([((2,), (3,)), ((1,),), ((1,), (2,))], tuple_classes),
+    ]
+    indicator_mat = np.array([[0, 1, 1], [1, 0, 0], [1, 1, 0]])
+    for inp, classes in inputs:
+        # fit_transform()
+        mlb = MultiLabelBinarizer()
+        inp = np.array(inp, dtype=object)
+        assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+        assert_array_equal(mlb.classes_, classes)
+        indicator_mat_inv = np.array(mlb.inverse_transform(indicator_mat), dtype=object)
+        assert_array_equal(indicator_mat_inv, inp)
+
+        # fit().transform()
+        mlb = MultiLabelBinarizer()
+        assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
+        assert_array_equal(mlb.classes_, classes)
+        indicator_mat_inv = np.array(mlb.inverse_transform(indicator_mat), dtype=object)
+        assert_array_equal(indicator_mat_inv, inp)
+
+    mlb = MultiLabelBinarizer()
+    with pytest.raises(TypeError):
+        mlb.fit_transform([({}), ({}, {"a": "b"})])
+
+
+def test_multilabel_binarizer_non_unique():
+    inp = [(1, 1, 1, 0)]
+    indicator_mat = np.array([[1, 1]])
+    mlb = MultiLabelBinarizer()
+    assert_array_equal(mlb.fit_transform(inp), indicator_mat)
+
+
+def test_multilabel_binarizer_inverse_validation():
+    inp = [(1, 1, 1, 0)]
+    mlb = MultiLabelBinarizer()
+    mlb.fit_transform(inp)
+    # Not binary
+    with pytest.raises(ValueError):
+        mlb.inverse_transform(np.array([[1, 3]]))
+    # The following binary cases are fine, however
+    mlb.inverse_transform(np.array([[0, 0]]))
+    mlb.inverse_transform(np.array([[1, 1]]))
+    mlb.inverse_transform(np.array([[1, 0]]))
+
+    # Wrong shape
+    with pytest.raises(ValueError):
+        mlb.inverse_transform(np.array([[1]]))
+    with pytest.raises(ValueError):
+        mlb.inverse_transform(np.array([[1, 1, 1]]))
+
+
+def test_label_binarize_with_class_order():
+    out = label_binarize([1, 6], classes=[1, 2, 4, 6])
+    expected = np.array([[1, 0, 0, 0], [0, 0, 0, 1]])
+    assert_array_equal(out, expected)
+
+    # Modified class order
+    out = label_binarize([1, 6], classes=[1, 6, 4, 2])
+    expected = np.array([[1, 0, 0, 0], [0, 1, 0, 0]])
+    assert_array_equal(out, expected)
+
+    out = label_binarize([0, 1, 2, 3], classes=[3, 2, 0, 1])
+    expected = np.array([[0, 0, 1, 0], [0, 0, 0, 1], [0, 1, 0, 0], [1, 0, 0, 0]])
+    assert_array_equal(out, expected)
+
+
+def check_binarized_results(y, classes, pos_label, neg_label, expected):
+    for sparse_output in [True, False]:
+        if (pos_label == 0 or neg_label != 0) and sparse_output:
+            with pytest.raises(ValueError):
+                label_binarize(
+                    y,
+                    classes=classes,
+                    neg_label=neg_label,
+                    pos_label=pos_label,
+                    sparse_output=sparse_output,
+                )
+            continue
+
+        # check label_binarize
+        binarized = label_binarize(
+            y,
+            classes=classes,
+            neg_label=neg_label,
+            pos_label=pos_label,
+            sparse_output=sparse_output,
+        )
+        assert_array_equal(toarray(binarized), expected)
+        assert issparse(binarized) == sparse_output
+
+        # check inverse
+        y_type = type_of_target(y)
+        if y_type == "multiclass":
+            inversed = _inverse_binarize_multiclass(binarized, classes=classes)
+
+        else:
+            inversed = _inverse_binarize_thresholding(
+                binarized,
+                output_type=y_type,
+                classes=classes,
+                threshold=((neg_label + pos_label) / 2.0),
+            )
+
+        assert_array_equal(toarray(inversed), toarray(y))
+
+        # Check label binarizer
+        lb = LabelBinarizer(
+            neg_label=neg_label, pos_label=pos_label, sparse_output=sparse_output
+        )
+        binarized = lb.fit_transform(y)
+        assert_array_equal(toarray(binarized), expected)
+        assert issparse(binarized) == sparse_output
+        inverse_output = lb.inverse_transform(binarized)
+        assert_array_equal(toarray(inverse_output), toarray(y))
+        assert issparse(inverse_output) == issparse(y)
+
+
+def test_label_binarize_binary():
+    y = [0, 1, 0]
+    classes = [0, 1]
+    pos_label = 2
+    neg_label = -1
+    expected = np.array([[2, -1], [-1, 2], [2, -1]])[:, 1].reshape((-1, 1))
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+    # Binary case where sparse_output = True will not result in a ValueError
+    y = [0, 1, 0]
+    classes = [0, 1]
+    pos_label = 3
+    neg_label = 0
+    expected = np.array([[3, 0], [0, 3], [3, 0]])[:, 1].reshape((-1, 1))
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+
+def test_label_binarize_multiclass():
+    y = [0, 1, 2]
+    classes = [0, 1, 2]
+    pos_label = 2
+    neg_label = 0
+    expected = 2 * np.eye(3)
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+    with pytest.raises(ValueError):
+        label_binarize(
+            y, classes=classes, neg_label=-1, pos_label=pos_label, sparse_output=True
+        )
+
+
+@pytest.mark.parametrize(
+    "arr_type",
+    [np.array]
+    + COO_CONTAINERS
+    + CSC_CONTAINERS
+    + CSR_CONTAINERS
+    + DOK_CONTAINERS
+    + LIL_CONTAINERS,
+)
+def test_label_binarize_multilabel(arr_type):
+    y_ind = np.array([[0, 1, 0], [1, 1, 1], [0, 0, 0]])
+    classes = [0, 1, 2]
+    pos_label = 2
+    neg_label = 0
+    expected = pos_label * y_ind
+    y = arr_type(y_ind)
+
+    check_binarized_results(y, classes, pos_label, neg_label, expected)
+
+    with pytest.raises(ValueError):
+        label_binarize(
+            y, classes=classes, neg_label=-1, pos_label=pos_label, sparse_output=True
+        )
+
+
+def test_invalid_input_label_binarize():
+    with pytest.raises(ValueError):
+        label_binarize([0, 2], classes=[0, 2], pos_label=0, neg_label=1)
+    with pytest.raises(ValueError, match="continuous target data is not "):
+        label_binarize([1.2, 2.7], classes=[0, 1])
+    with pytest.raises(ValueError, match="mismatch with the labels"):
+        label_binarize([[1, 3]], classes=[1, 2, 3])
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_inverse_binarize_multiclass(csr_container):
+    got = _inverse_binarize_multiclass(
+        csr_container([[0, 1, 0], [-1, 0, -1], [0, 0, 0]]), np.arange(3)
+    )
+    assert_array_equal(got, np.array([1, 1, 0]))
+
+
+def test_nan_label_encoder():
+    """Check that label encoder encodes nans in transform.
+
+    Non-regression test for #22628.
+    """
+    le = LabelEncoder()
+    le.fit(["a", "a", "b", np.nan])
+
+    y_trans = le.transform([np.nan])
+    assert_array_equal(y_trans, [2])
+
+
+@pytest.mark.parametrize(
+    "encoder", [LabelEncoder(), LabelBinarizer(), MultiLabelBinarizer()]
+)
+def test_label_encoders_do_not_have_set_output(encoder):
+    """Check that label encoders do not define set_output and work with y as a kwarg.
+
+    Non-regression test for #26854.
+    """
+    assert not hasattr(encoder, "set_output")
+    y_encoded_with_kwarg = encoder.fit_transform(y=["a", "b", "c"])
+    y_encoded_positional = encoder.fit_transform(["a", "b", "c"])
+    assert_array_equal(y_encoded_with_kwarg, y_encoded_positional)
+
+
+@pytest.mark.parametrize(
+    "array_namespace, device, dtype",
+    yield_namespace_device_dtype_combinations(),
+    ids=_get_namespace_device_dtype_ids,
+)
+@pytest.mark.parametrize(
+    "y",
+    [
+        np.array([2, 1, 3, 1, 3]),
+        np.array([1, 1, 4, 5, -1, 0]),
+        np.array([3, 5, 9, 5, 9, 3]),
+    ],
+)
+def test_label_encoder_array_api_compliance(y, array_namespace, device, dtype):
+    xp = _array_api_for_tests(array_namespace, device)
+    xp_y = xp.asarray(y, device=device)
+    with config_context(array_api_dispatch=True):
+        xp_label = LabelEncoder()
+        np_label = LabelEncoder()
+        xp_label = xp_label.fit(xp_y)
+        xp_transformed = xp_label.transform(xp_y)
+        xp_inv_transformed = xp_label.inverse_transform(xp_transformed)
+        np_label = np_label.fit(y)
+        np_transformed = np_label.transform(y)
+        assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_inv_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_label.classes_)[0].__name__ == xp.__name__
+        assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
+        assert_array_equal(_convert_to_numpy(xp_inv_transformed, xp), y)
+        assert_array_equal(_convert_to_numpy(xp_label.classes_, xp), np_label.classes_)
+
+        xp_label = LabelEncoder()
+        np_label = LabelEncoder()
+        xp_transformed = xp_label.fit_transform(xp_y)
+        np_transformed = np_label.fit_transform(y)
+        assert get_namespace(xp_transformed)[0].__name__ == xp.__name__
+        assert get_namespace(xp_label.classes_)[0].__name__ == xp.__name__
+        assert_array_equal(_convert_to_numpy(xp_transformed, xp), np_transformed)
+        assert_array_equal(_convert_to_numpy(xp_label.classes_, xp), np_label.classes_)
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_polynomial.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_polynomial.py
--- a/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_target_encoder.py
+++ b/venv/lib/python3.12/site-packages/sklearn/preprocessing/tests/test_target_encoder.py
@@ -0,0 +1,714 @@
+import re
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_equal
+
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import Ridge
+from sklearn.model_selection import (
+    KFold,
+    ShuffleSplit,
+    StratifiedKFold,
+    cross_val_score,
+    train_test_split,
+)
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import (
+    KBinsDiscretizer,
+    LabelBinarizer,
+    LabelEncoder,
+    TargetEncoder,
+)
+
+
+def _encode_target(X_ordinal, y_numeric, n_categories, smooth):
+    """Simple Python implementation of target encoding."""
+    cur_encodings = np.zeros(n_categories, dtype=np.float64)
+    y_mean = np.mean(y_numeric)
+
+    if smooth == "auto":
+        y_variance = np.var(y_numeric)
+        for c in range(n_categories):
+            y_subset = y_numeric[X_ordinal == c]
+            n_i = y_subset.shape[0]
+
+            if n_i == 0:
+                cur_encodings[c] = y_mean
+                continue
+
+            y_subset_variance = np.var(y_subset)
+            m = y_subset_variance / y_variance
+            lambda_ = n_i / (n_i + m)
+
+            cur_encodings[c] = lambda_ * np.mean(y_subset) + (1 - lambda_) * y_mean
+        return cur_encodings
+    else:  # float
+        for c in range(n_categories):
+            y_subset = y_numeric[X_ordinal == c]
+            current_sum = np.sum(y_subset) + y_mean * smooth
+            current_cnt = y_subset.shape[0] + smooth
+            cur_encodings[c] = current_sum / current_cnt
+        return cur_encodings
+
+
+@pytest.mark.parametrize(
+    "categories, unknown_value",
+    [
+        ([np.array([0, 1, 2], dtype=np.int64)], 4),
+        ([np.array([1.0, 3.0, np.nan], dtype=np.float64)], 6.0),
+        ([np.array(["cat", "dog", "snake"], dtype=object)], "bear"),
+        ("auto", 3),
+    ],
+)
+@pytest.mark.parametrize("smooth", [5.0, "auto"])
+@pytest.mark.parametrize("target_type", ["binary", "continuous"])
+def test_encoding(categories, unknown_value, global_random_seed, smooth, target_type):
+    """Check encoding for binary and continuous targets.
+
+    Compare the values returned by `TargetEncoder.fit_transform` against the
+    expected encodings for cv splits from a naive reference Python
+    implementation in _encode_target.
+    """
+
+    n_categories = 3
+    X_train_int_array = np.array([[0] * 20 + [1] * 30 + [2] * 40], dtype=np.int64).T
+    X_test_int_array = np.array([[0, 1, 2]], dtype=np.int64).T
+    n_samples = X_train_int_array.shape[0]
+
+    if categories == "auto":
+        X_train = X_train_int_array
+        X_test = X_test_int_array
+    else:
+        X_train = categories[0][X_train_int_array]
+        X_test = categories[0][X_test_int_array]
+
+    X_test = np.concatenate((X_test, [[unknown_value]]))
+
+    data_rng = np.random.RandomState(global_random_seed)
+    n_splits = 3
+    if target_type == "binary":
+        y_numeric = data_rng.randint(low=0, high=2, size=n_samples)
+        target_names = np.array(["cat", "dog"], dtype=object)
+        y_train = target_names[y_numeric]
+
+    else:
+        assert target_type == "continuous"
+        y_numeric = data_rng.uniform(low=-10, high=20, size=n_samples)
+        y_train = y_numeric
+
+    shuffled_idx = data_rng.permutation(n_samples)
+    X_train_int_array = X_train_int_array[shuffled_idx]
+    X_train = X_train[shuffled_idx]
+    y_train = y_train[shuffled_idx]
+    y_numeric = y_numeric[shuffled_idx]
+
+    # Define our CV splitting strategy
+    if target_type == "binary":
+        cv = StratifiedKFold(
+            n_splits=n_splits, random_state=global_random_seed, shuffle=True
+        )
+    else:
+        cv = KFold(n_splits=n_splits, random_state=global_random_seed, shuffle=True)
+
+    # Compute the expected values using our reference Python implementation of
+    # target encoding:
+    expected_X_fit_transform = np.empty_like(X_train_int_array, dtype=np.float64)
+
+    for train_idx, test_idx in cv.split(X_train_int_array, y_train):
+        X_, y_ = X_train_int_array[train_idx, 0], y_numeric[train_idx]
+        cur_encodings = _encode_target(X_, y_, n_categories, smooth)
+        expected_X_fit_transform[test_idx, 0] = cur_encodings[
+            X_train_int_array[test_idx, 0]
+        ]
+
+    # Check that we can obtain the same encodings by calling `fit_transform` on
+    # the estimator with the same CV parameters:
+    target_encoder = TargetEncoder(
+        smooth=smooth,
+        categories=categories,
+        cv=n_splits,
+        random_state=global_random_seed,
+    )
+
+    X_fit_transform = target_encoder.fit_transform(X_train, y_train)
+
+    assert target_encoder.target_type_ == target_type
+    assert_allclose(X_fit_transform, expected_X_fit_transform)
+    assert len(target_encoder.encodings_) == 1
+    if target_type == "binary":
+        assert_array_equal(target_encoder.classes_, target_names)
+    else:
+        assert target_encoder.classes_ is None
+
+    # compute encodings for all data to validate `transform`
+    y_mean = np.mean(y_numeric)
+    expected_encodings = _encode_target(
+        X_train_int_array[:, 0], y_numeric, n_categories, smooth
+    )
+    assert_allclose(target_encoder.encodings_[0], expected_encodings)
+    assert target_encoder.target_mean_ == pytest.approx(y_mean)
+
+    # Transform on test data, the last value is unknown so it is encoded as the target
+    # mean
+    expected_X_test_transform = np.concatenate(
+        (expected_encodings, np.array([y_mean]))
+    ).reshape(-1, 1)
+
+    X_test_transform = target_encoder.transform(X_test)
+    assert_allclose(X_test_transform, expected_X_test_transform)
+
+
+@pytest.mark.parametrize(
+    "categories, unknown_values",
+    [
+        ([np.array([0, 1, 2], dtype=np.int64)], "auto"),
+        ([np.array(["cat", "dog", "snake"], dtype=object)], ["bear", "rabbit"]),
+    ],
+)
+@pytest.mark.parametrize(
+    "target_labels", [np.array([1, 2, 3]), np.array(["a", "b", "c"])]
+)
+@pytest.mark.parametrize("smooth", [5.0, "auto"])
+def test_encoding_multiclass(
+    global_random_seed, categories, unknown_values, target_labels, smooth
+):
+    """Check encoding for multiclass targets."""
+    rng = np.random.RandomState(global_random_seed)
+
+    n_samples = 80
+    n_features = 2
+    feat_1_int = np.array(rng.randint(low=0, high=2, size=n_samples))
+    feat_2_int = np.array(rng.randint(low=0, high=3, size=n_samples))
+    feat_1 = categories[0][feat_1_int]
+    feat_2 = categories[0][feat_2_int]
+    X_train = np.column_stack((feat_1, feat_2))
+    X_train_int = np.column_stack((feat_1_int, feat_2_int))
+    categories_ = [[0, 1], [0, 1, 2]]
+
+    n_classes = 3
+    y_train_int = np.array(rng.randint(low=0, high=n_classes, size=n_samples))
+    y_train = target_labels[y_train_int]
+    y_train_enc = LabelBinarizer().fit_transform(y_train)
+
+    n_splits = 3
+    cv = StratifiedKFold(
+        n_splits=n_splits, random_state=global_random_seed, shuffle=True
+    )
+
+    # Manually compute encodings for cv splits to validate `fit_transform`
+    expected_X_fit_transform = np.empty(
+        (X_train_int.shape[0], X_train_int.shape[1] * n_classes),
+        dtype=np.float64,
+    )
+    for f_idx, cats in enumerate(categories_):
+        for c_idx in range(n_classes):
+            for train_idx, test_idx in cv.split(X_train, y_train):
+                y_class = y_train_enc[:, c_idx]
+                X_, y_ = X_train_int[train_idx, f_idx], y_class[train_idx]
+                current_encoding = _encode_target(X_, y_, len(cats), smooth)
+                # f_idx:   0, 0, 0, 1, 1, 1
+                # c_idx:   0, 1, 2, 0, 1, 2
+                # exp_idx: 0, 1, 2, 3, 4, 5
+                exp_idx = c_idx + (f_idx * n_classes)
+                expected_X_fit_transform[test_idx, exp_idx] = current_encoding[
+                    X_train_int[test_idx, f_idx]
+                ]
+
+    target_encoder = TargetEncoder(
+        smooth=smooth,
+        cv=n_splits,
+        random_state=global_random_seed,
+    )
+    X_fit_transform = target_encoder.fit_transform(X_train, y_train)
+
+    assert target_encoder.target_type_ == "multiclass"
+    assert_allclose(X_fit_transform, expected_X_fit_transform)
+
+    # Manually compute encoding to validate `transform`
+    expected_encodings = []
+    for f_idx, cats in enumerate(categories_):
+        for c_idx in range(n_classes):
+            y_class = y_train_enc[:, c_idx]
+            current_encoding = _encode_target(
+                X_train_int[:, f_idx], y_class, len(cats), smooth
+            )
+            expected_encodings.append(current_encoding)
+
+    assert len(target_encoder.encodings_) == n_features * n_classes
+    for i in range(n_features * n_classes):
+        assert_allclose(target_encoder.encodings_[i], expected_encodings[i])
+    assert_array_equal(target_encoder.classes_, target_labels)
+
+    # Include unknown values at the end
+    X_test_int = np.array([[0, 1], [1, 2], [4, 5]])
+    if unknown_values == "auto":
+        X_test = X_test_int
+    else:
+        X_test = np.empty_like(X_test_int[:-1, :], dtype=object)
+        for column_idx in range(X_test_int.shape[1]):
+            X_test[:, column_idx] = categories[0][X_test_int[:-1, column_idx]]
+        # Add unknown values at end
+        X_test = np.vstack((X_test, unknown_values))
+
+    y_mean = np.mean(y_train_enc, axis=0)
+    expected_X_test_transform = np.empty(
+        (X_test_int.shape[0], X_test_int.shape[1] * n_classes),
+        dtype=np.float64,
+    )
+    n_rows = X_test_int.shape[0]
+    f_idx = [0, 0, 0, 1, 1, 1]
+    # Last row are unknowns, dealt with later
+    for row_idx in range(n_rows - 1):
+        for i, enc in enumerate(expected_encodings):
+            expected_X_test_transform[row_idx, i] = enc[X_test_int[row_idx, f_idx[i]]]
+
+    # Unknowns encoded as target mean for each class
+    # `y_mean` contains target mean for each class, thus cycle through mean of
+    # each class, `n_features` times
+    mean_idx = [0, 1, 2, 0, 1, 2]
+    for i in range(n_classes * n_features):
+        expected_X_test_transform[n_rows - 1, i] = y_mean[mean_idx[i]]
+
+    X_test_transform = target_encoder.transform(X_test)
+    assert_allclose(X_test_transform, expected_X_test_transform)
+
+
+@pytest.mark.parametrize(
+    "X, categories",
+    [
+        (
+            np.array([[0] * 10 + [1] * 10 + [3]], dtype=np.int64).T,  # 3 is unknown
+            [[0, 1, 2]],
+        ),
+        (
+            np.array(
+                [["cat"] * 10 + ["dog"] * 10 + ["snake"]], dtype=object
+            ).T,  # snake is unknown
+            [["dog", "cat", "cow"]],
+        ),
+    ],
+)
+@pytest.mark.parametrize("smooth", [4.0, "auto"])
+def test_custom_categories(X, categories, smooth):
+    """Custom categories with unknown categories that are not in training data."""
+    rng = np.random.RandomState(0)
+    y = rng.uniform(low=-10, high=20, size=X.shape[0])
+    enc = TargetEncoder(categories=categories, smooth=smooth, random_state=0).fit(X, y)
+
+    # The last element is unknown and encoded as the mean
+    y_mean = y.mean()
+    X_trans = enc.transform(X[-1:])
+    assert X_trans[0, 0] == pytest.approx(y_mean)
+
+    assert len(enc.encodings_) == 1
+    # custom category that is not in training data
+    assert enc.encodings_[0][-1] == pytest.approx(y_mean)
+
+
+@pytest.mark.parametrize(
+    "y, msg",
+    [
+        ([1, 2, 0, 1], "Found input variables with inconsistent"),
+        (
+            np.array([[1, 2, 0], [1, 2, 3]]).T,
+            "Target type was inferred to be 'multiclass-multioutput'",
+        ),
+    ],
+)
+def test_errors(y, msg):
+    """Check invalidate input."""
+    X = np.array([[1, 0, 1]]).T
+
+    enc = TargetEncoder()
+    with pytest.raises(ValueError, match=msg):
+        enc.fit_transform(X, y)
+
+
+def test_use_regression_target():
+    """Check inferred and specified `target_type` on regression target."""
+    X = np.array([[0, 1, 0, 1, 0, 1]]).T
+    y = np.array([1.0, 2.0, 3.0, 2.0, 3.0, 4.0])
+
+    enc = TargetEncoder(cv=2)
+    with pytest.warns(
+        UserWarning,
+        match=re.escape(
+            "The least populated class in y has only 1 members, which is less than"
+            " n_splits=2."
+        ),
+    ):
+        enc.fit_transform(X, y)
+    assert enc.target_type_ == "multiclass"
+
+    enc = TargetEncoder(cv=2, target_type="continuous")
+    enc.fit_transform(X, y)
+    assert enc.target_type_ == "continuous"
+
+
+@pytest.mark.parametrize(
+    "y, feature_names",
+    [
+        ([1, 2] * 10, ["A", "B"]),
+        ([1, 2, 3] * 6 + [1, 2], ["A_1", "A_2", "A_3", "B_1", "B_2", "B_3"]),
+        (
+            ["y1", "y2", "y3"] * 6 + ["y1", "y2"],
+            ["A_y1", "A_y2", "A_y3", "B_y1", "B_y2", "B_y3"],
+        ),
+    ],
+)
+def test_feature_names_out_set_output(y, feature_names):
+    """Check TargetEncoder works with set_output."""
+    pd = pytest.importorskip("pandas")
+
+    X_df = pd.DataFrame({"A": ["a", "b"] * 10, "B": [1, 2] * 10})
+
+    enc_default = TargetEncoder(cv=2, smooth=3.0, random_state=0)
+    enc_default.set_output(transform="default")
+    enc_pandas = TargetEncoder(cv=2, smooth=3.0, random_state=0)
+    enc_pandas.set_output(transform="pandas")
+
+    X_default = enc_default.fit_transform(X_df, y)
+    X_pandas = enc_pandas.fit_transform(X_df, y)
+
+    assert_allclose(X_pandas.to_numpy(), X_default)
+    assert_array_equal(enc_pandas.get_feature_names_out(), feature_names)
+    assert_array_equal(enc_pandas.get_feature_names_out(), X_pandas.columns)
+
+
+@pytest.mark.parametrize("to_pandas", [True, False])
+@pytest.mark.parametrize("smooth", [1.0, "auto"])
+@pytest.mark.parametrize("target_type", ["binary-ints", "binary-str", "continuous"])
+def test_multiple_features_quick(to_pandas, smooth, target_type):
+    """Check target encoder with multiple features."""
+    X_ordinal = np.array(
+        [[1, 1], [0, 1], [1, 1], [2, 1], [1, 0], [0, 1], [1, 0], [0, 0]], dtype=np.int64
+    )
+    if target_type == "binary-str":
+        y_train = np.array(["a", "b", "a", "a", "b", "b", "a", "b"])
+        y_integer = LabelEncoder().fit_transform(y_train)
+        cv = StratifiedKFold(2, random_state=0, shuffle=True)
+    elif target_type == "binary-ints":
+        y_train = np.array([3, 4, 3, 3, 3, 4, 4, 4])
+        y_integer = LabelEncoder().fit_transform(y_train)
+        cv = StratifiedKFold(2, random_state=0, shuffle=True)
+    else:
+        y_train = np.array([3.0, 5.1, 2.4, 3.5, 4.1, 5.5, 10.3, 7.3], dtype=np.float32)
+        y_integer = y_train
+        cv = KFold(2, random_state=0, shuffle=True)
+    y_mean = np.mean(y_integer)
+    categories = [[0, 1, 2], [0, 1]]
+
+    X_test = np.array(
+        [
+            [0, 1],
+            [3, 0],  # 3 is unknown
+            [1, 10],  # 10 is unknown
+        ],
+        dtype=np.int64,
+    )
+
+    if to_pandas:
+        pd = pytest.importorskip("pandas")
+        # convert second feature to an object
+        X_train = pd.DataFrame(
+            {
+                "feat0": X_ordinal[:, 0],
+                "feat1": np.array(["cat", "dog"], dtype=object)[X_ordinal[:, 1]],
+            }
+        )
+        # "snake" is unknown
+        X_test = pd.DataFrame({"feat0": X_test[:, 0], "feat1": ["dog", "cat", "snake"]})
+    else:
+        X_train = X_ordinal
+
+    # manually compute encoding for fit_transform
+    expected_X_fit_transform = np.empty_like(X_ordinal, dtype=np.float64)
+    for f_idx, cats in enumerate(categories):
+        for train_idx, test_idx in cv.split(X_ordinal, y_integer):
+            X_, y_ = X_ordinal[train_idx, f_idx], y_integer[train_idx]
+            current_encoding = _encode_target(X_, y_, len(cats), smooth)
+            expected_X_fit_transform[test_idx, f_idx] = current_encoding[
+                X_ordinal[test_idx, f_idx]
+            ]
+
+    # manually compute encoding for transform
+    expected_encodings = []
+    for f_idx, cats in enumerate(categories):
+        current_encoding = _encode_target(
+            X_ordinal[:, f_idx], y_integer, len(cats), smooth
+        )
+        expected_encodings.append(current_encoding)
+
+    expected_X_test_transform = np.array(
+        [
+            [expected_encodings[0][0], expected_encodings[1][1]],
+            [y_mean, expected_encodings[1][0]],
+            [expected_encodings[0][1], y_mean],
+        ],
+        dtype=np.float64,
+    )
+
+    enc = TargetEncoder(smooth=smooth, cv=2, random_state=0)
+    X_fit_transform = enc.fit_transform(X_train, y_train)
+    assert_allclose(X_fit_transform, expected_X_fit_transform)
+
+    assert len(enc.encodings_) == 2
+    for i in range(2):
+        assert_allclose(enc.encodings_[i], expected_encodings[i])
+
+    X_test_transform = enc.transform(X_test)
+    assert_allclose(X_test_transform, expected_X_test_transform)
+
+
+@pytest.mark.parametrize(
+    "y, y_mean",
+    [
+        (np.array([3.4] * 20), 3.4),
+        (np.array([0] * 20), 0),
+        (np.array(["a"] * 20, dtype=object), 0),
+    ],
+    ids=["continuous", "binary", "binary-string"],
+)
+@pytest.mark.parametrize("smooth", ["auto", 4.0, 0.0])
+def test_constant_target_and_feature(y, y_mean, smooth):
+    """Check edge case where feature and target is constant."""
+    X = np.array([[1] * 20]).T
+    n_samples = X.shape[0]
+
+    enc = TargetEncoder(cv=2, smooth=smooth, random_state=0)
+    X_trans = enc.fit_transform(X, y)
+    assert_allclose(X_trans, np.repeat([[y_mean]], n_samples, axis=0))
+    assert enc.encodings_[0][0] == pytest.approx(y_mean)
+    assert enc.target_mean_ == pytest.approx(y_mean)
+
+    X_test = np.array([[1], [0]])
+    X_test_trans = enc.transform(X_test)
+    assert_allclose(X_test_trans, np.repeat([[y_mean]], 2, axis=0))
+
+
+def test_fit_transform_not_associated_with_y_if_ordinal_categorical_is_not(
+    global_random_seed,
+):
+    cardinality = 30  # not too large, otherwise we need a very large n_samples
+    n_samples = 3000
+    rng = np.random.RandomState(global_random_seed)
+    y_train = rng.normal(size=n_samples)
+    X_train = rng.randint(0, cardinality, size=n_samples).reshape(-1, 1)
+
+    # Sort by y_train to attempt to cause a leak
+    y_sorted_indices = y_train.argsort()
+    y_train = y_train[y_sorted_indices]
+    X_train = X_train[y_sorted_indices]
+
+    target_encoder = TargetEncoder(shuffle=True, random_state=global_random_seed)
+    X_encoded_train_shuffled = target_encoder.fit_transform(X_train, y_train)
+
+    target_encoder = TargetEncoder(shuffle=False)
+    X_encoded_train_no_shuffled = target_encoder.fit_transform(X_train, y_train)
+
+    # Check that no information about y_train has leaked into X_train:
+    regressor = RandomForestRegressor(
+        n_estimators=10, min_samples_leaf=20, random_state=global_random_seed
+    )
+
+    # It's impossible to learn a good predictive model on the training set when
+    # using the original representation X_train or the target encoded
+    # representation with shuffled inner CV. For the latter, no information
+    # about y_train has inadvertently leaked into the prior used to generate
+    # `X_encoded_train_shuffled`:
+    cv = ShuffleSplit(n_splits=50, random_state=global_random_seed)
+    assert cross_val_score(regressor, X_train, y_train, cv=cv).mean() < 0.1
+    assert (
+        cross_val_score(regressor, X_encoded_train_shuffled, y_train, cv=cv).mean()
+        < 0.1
+    )
+
+    # Without the inner CV shuffling, a lot of information about y_train goes into the
+    # the per-fold y_train.mean() priors: shrinkage is no longer effective in this
+    # case and would no longer be able to prevent downstream over-fitting.
+    assert (
+        cross_val_score(regressor, X_encoded_train_no_shuffled, y_train, cv=cv).mean()
+        > 0.5
+    )
+
+
+def test_smooth_zero():
+    """Check edge case with zero smoothing and cv does not contain category."""
+    X = np.array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1]]).T
+    y = np.array([2.1, 4.3, 1.2, 3.1, 1.0, 9.0, 10.3, 14.2, 13.3, 15.0])
+
+    enc = TargetEncoder(smooth=0.0, shuffle=False, cv=2)
+    X_trans = enc.fit_transform(X, y)
+
+    # With cv = 2, category 0 does not exist in the second half, thus
+    # it will be encoded as the mean of the second half
+    assert_allclose(X_trans[0], np.mean(y[5:]))
+
+    # category 1 does not exist in the first half, thus it will be encoded as
+    # the mean of the first half
+    assert_allclose(X_trans[-1], np.mean(y[:5]))
+
+
+@pytest.mark.parametrize("smooth", [0.0, 1e3, "auto"])
+def test_invariance_of_encoding_under_label_permutation(smooth, global_random_seed):
+    # Check that the encoding does not depend on the integer of the value of
+    # the integer labels. This is quite a trivial property but it is helpful
+    # to understand the following test.
+    rng = np.random.RandomState(global_random_seed)
+
+    # Random y and informative categorical X to make the test non-trivial when
+    # using smoothing.
+    y = rng.normal(size=1000)
+    n_categories = 30
+    X = KBinsDiscretizer(
+        n_bins=n_categories, quantile_method="averaged_inverted_cdf", encode="ordinal"
+    ).fit_transform(y.reshape(-1, 1))
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, random_state=global_random_seed
+    )
+
+    # Shuffle the labels to make sure that the encoding is invariant to the
+    # permutation of the labels
+    permutated_labels = rng.permutation(n_categories)
+    X_train_permuted = permutated_labels[X_train.astype(np.int32)]
+    X_test_permuted = permutated_labels[X_test.astype(np.int32)]
+
+    target_encoder = TargetEncoder(smooth=smooth, random_state=global_random_seed)
+    X_train_encoded = target_encoder.fit_transform(X_train, y_train)
+    X_test_encoded = target_encoder.transform(X_test)
+
+    X_train_permuted_encoded = target_encoder.fit_transform(X_train_permuted, y_train)
+    X_test_permuted_encoded = target_encoder.transform(X_test_permuted)
+
+    assert_allclose(X_train_encoded, X_train_permuted_encoded)
+    assert_allclose(X_test_encoded, X_test_permuted_encoded)
+
+
+@pytest.mark.parametrize("smooth", [0.0, "auto"])
+def test_target_encoding_for_linear_regression(smooth, global_random_seed):
+    # Check some expected statistical properties when fitting a linear
+    # regression model on target encoded features depending on their relation
+    # with that target.
+
+    # In this test, we use the Ridge class with the "lsqr" solver and a little
+    # bit of regularization to implement a linear regression model that
+    # converges quickly for large `n_samples` and robustly in case of
+    # correlated features. Since we will fit this model on a mean centered
+    # target, we do not need to fit an intercept and this will help simplify
+    # the analysis with respect to the expected coefficients.
+    linear_regression = Ridge(alpha=1e-6, solver="lsqr", fit_intercept=False)
+
+    # Construct a random target variable. We need a large number of samples for
+    # this test to be stable across all values of the random seed.
+    n_samples = 50_000
+    rng = np.random.RandomState(global_random_seed)
+    y = rng.randn(n_samples)
+
+    # Generate a single informative ordinal feature with medium cardinality.
+    # Inject some irreducible noise to make it harder for a multivariate model
+    # to identify the informative feature from other pure noise features.
+    noise = 0.8 * rng.randn(n_samples)
+    n_categories = 100
+    X_informative = KBinsDiscretizer(
+        n_bins=n_categories,
+        encode="ordinal",
+        strategy="uniform",
+        random_state=rng,
+    ).fit_transform((y + noise).reshape(-1, 1))
+
+    # Let's permute the labels to hide the fact that this feature is
+    # informative to naive linear regression model trained on the raw ordinal
+    # values. As highlighted in the previous test, the target encoding should be
+    # invariant to such a permutation.
+    permutated_labels = rng.permutation(n_categories)
+    X_informative = permutated_labels[X_informative.astype(np.int32)]
+
+    # Generate a shuffled copy of the informative feature to destroy the
+    # relationship with the target.
+    X_shuffled = rng.permutation(X_informative)
+
+    # Also include a very high cardinality categorical feature that is by
+    # itself independent of the target variable: target encoding such a feature
+    # without internal cross-validation should cause catastrophic overfitting
+    # for the downstream regressor, even with shrinkage. This kind of features
+    # typically represents near unique identifiers of samples. In general they
+    # should be removed from a machine learning datasets but here we want to
+    # study the ability of the default behavior of TargetEncoder to mitigate
+    # them automatically.
+    X_near_unique_categories = rng.choice(
+        int(0.9 * n_samples), size=n_samples, replace=True
+    ).reshape(-1, 1)
+
+    # Assemble the dataset and do a train-test split:
+    X = np.concatenate(
+        [X_informative, X_shuffled, X_near_unique_categories],
+        axis=1,
+    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    # Let's first check that a linear regression model trained on the raw
+    # features underfits because of the meaning-less ordinal encoding of the
+    # labels.
+    raw_model = linear_regression.fit(X_train, y_train)
+    assert raw_model.score(X_train, y_train) < 0.1
+    assert raw_model.score(X_test, y_test) < 0.1
+
+    # Now do the same with target encoding using the internal CV mechanism
+    # implemented when using fit_transform.
+    model_with_cv = make_pipeline(
+        TargetEncoder(smooth=smooth, random_state=rng), linear_regression
+    ).fit(X_train, y_train)
+
+    # This model should be able to fit the data well and also generalise to the
+    # test data (assuming that the binning is fine-grained enough). The R2
+    # scores are not perfect because of the noise injected during the
+    # generation of the unique informative feature.
+    coef = model_with_cv[-1].coef_
+    assert model_with_cv.score(X_train, y_train) > 0.5, coef
+    assert model_with_cv.score(X_test, y_test) > 0.5, coef
+
+    # The target encoder recovers the linear relationship with slope 1 between
+    # the target encoded unique informative predictor and the target. Since the
+    # target encoding of the 2 other features is not informative thanks to the
+    # use of internal cross-validation, the multivariate linear regressor
+    # assigns a coef of 1 to the first feature and 0 to the other 2.
+    assert coef[0] == pytest.approx(1, abs=1e-2)
+    assert (np.abs(coef[1:]) < 0.2).all()
+
+    # Let's now disable the internal cross-validation by calling fit and then
+    # transform separately on the training set:
+    target_encoder = TargetEncoder(smooth=smooth, random_state=rng).fit(
+        X_train, y_train
+    )
+    X_enc_no_cv_train = target_encoder.transform(X_train)
+    X_enc_no_cv_test = target_encoder.transform(X_test)
+    model_no_cv = linear_regression.fit(X_enc_no_cv_train, y_train)
+
+    # The linear regression model should always overfit because it assigns
+    # too much weight to the extremely high cardinality feature relatively to
+    # the informative feature. Note that this is the case even when using
+    # the empirical Bayes smoothing which is not enough to prevent such
+    # overfitting alone.
+    coef = model_no_cv.coef_
+    assert model_no_cv.score(X_enc_no_cv_train, y_train) > 0.7, coef
+    assert model_no_cv.score(X_enc_no_cv_test, y_test) < 0.5, coef
+
+    # The model overfits because it assigns too much weight to the high
+    # cardinality yet non-informative feature instead of the lower
+    # cardinality yet informative feature:
+    assert abs(coef[0]) < abs(coef[2])
+
+
+def test_pandas_copy_on_write():
+    """
+    Test target-encoder cython code when y is read-only.
+
+    The numpy array underlying df["y"] is read-only when copy-on-write is enabled.
+    Non-regression test for gh-27879.
+    """
+    pd = pytest.importorskip("pandas", minversion="2.0")
+    with pd.option_context("mode.copy_on_write", True):
+        df = pd.DataFrame({"x": ["a", "b", "b"], "y": [4.0, 5.0, 6.0]})
+        TargetEncoder(target_type="continuous").fit(df[["x"]], df["y"])