add read me

2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/init.py
@@ -0,0 +1,13 @@
+"""Semi-supervised learning algorithms.
+
+These algorithms utilize small amounts of labeled data and large amounts of unlabeled
+data for classification tasks.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from ._label_propagation import LabelPropagation, LabelSpreading
+from ._self_training import SelfTrainingClassifier
+
+__all__ = ["LabelPropagation", "LabelSpreading", "SelfTrainingClassifier"]
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/pycache/_label_propagation.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/pycache/_label_propagation.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/pycache/_self_training.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/pycache/_self_training.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/_label_propagation.py
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/_label_propagation.py
@@ -0,0 +1,630 @@
+# coding=utf8
+"""
+Label propagation in the context of this module refers to a set of
+semi-supervised classification algorithms. At a high level, these algorithms
+work by forming a fully-connected graph between all points given and solving
+for the steady-state distribution of labels at each point.
+
+These algorithms perform very well in practice. The cost of running can be very
+expensive, at approximately O(N^3) where N is the number of (labeled and
+unlabeled) points. The theory (why they perform so well) is motivated by
+intuitions from random walk algorithms and geometric relationships in the data.
+For more information see the references below.
+
+Model Features
+--------------
+Label clamping:
+  The algorithm tries to learn distributions of labels over the dataset given
+  label assignments over an initial subset. In one variant, the algorithm does
+  not allow for any errors in the initial assignment (hard-clamping) while
+  in another variant, the algorithm allows for some wiggle room for the initial
+  assignments, allowing them to change by a fraction alpha in each iteration
+  (soft-clamping).
+
+Kernel:
+  A function which projects a vector into some higher dimensional space. This
+  implementation supports RBF and KNN kernels. Using the RBF kernel generates
+  a dense matrix of size O(N^2). KNN kernel will generate a sparse matrix of
+  size O(k*N) which will run much faster. See the documentation for SVMs for
+  more info on kernels.
+
+Examples
+--------
+>>> import numpy as np
+>>> from sklearn import datasets
+>>> from sklearn.semi_supervised import LabelPropagation
+>>> label_prop_model = LabelPropagation()
+>>> iris = datasets.load_iris()
+>>> rng = np.random.RandomState(42)
+>>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
+>>> labels = np.copy(iris.target)
+>>> labels[random_unlabeled_points] = -1
+>>> label_prop_model.fit(iris.data, labels)
+LabelPropagation(...)
+
+Notes
+-----
+References:
+[1] Yoshua Bengio, Olivier Delalleau, Nicolas Le Roux. In Semi-Supervised
+Learning (2006), pp. 193-216
+
+[2] Olivier Delalleau, Yoshua Bengio, Nicolas Le Roux. Efficient
+Non-Parametric Function Induction in Semi-Supervised Learning. AISTAT 2005
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+from abc import ABCMeta, abstractmethod
+from numbers import Integral, Real
+
+import numpy as np
+from scipy import sparse
+
+from ..base import BaseEstimator, ClassifierMixin, _fit_context
+from ..exceptions import ConvergenceWarning
+from ..metrics.pairwise import rbf_kernel
+from ..neighbors import NearestNeighbors
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import safe_sparse_dot
+from ..utils.fixes import laplacian as csgraph_laplacian
+from ..utils.multiclass import check_classification_targets
+from ..utils.validation import check_is_fitted, validate_data
+
+
+class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
+    """Base class for label propagation module.
+
+     Parameters
+     ----------
+     kernel : {'knn', 'rbf'} or callable, default='rbf'
+         String identifier for kernel function to use or the kernel function
+         itself. Only 'rbf' and 'knn' strings are valid inputs. The function
+         passed should take two inputs, each of shape (n_samples, n_features),
+         and return a (n_samples, n_samples) shaped weight matrix.
+
+     gamma : float, default=20
+         Parameter for rbf kernel.
+
+     n_neighbors : int, default=7
+         Parameter for knn kernel. Need to be strictly positive.
+
+     alpha : float, default=1.0
+         Clamping factor.
+
+     max_iter : int, default=30
+         Change maximum number of iterations allowed.
+
+     tol : float, default=1e-3
+         Convergence tolerance: threshold to consider the system at steady
+         state.
+
+    n_jobs : int, default=None
+         The number of parallel jobs to run.
+         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+         for more details.
+    """
+
+    _parameter_constraints: dict = {
+        "kernel": [StrOptions({"knn", "rbf"}), callable],
+        "gamma": [Interval(Real, 0, None, closed="left")],
+        "n_neighbors": [Interval(Integral, 0, None, closed="neither")],
+        "alpha": [None, Interval(Real, 0, 1, closed="neither")],
+        "max_iter": [Interval(Integral, 0, None, closed="neither")],
+        "tol": [Interval(Real, 0, None, closed="left")],
+        "n_jobs": [None, Integral],
+    }
+
+    def __init__(
+        self,
+        kernel="rbf",
+        *,
+        gamma=20,
+        n_neighbors=7,
+        alpha=1,
+        max_iter=30,
+        tol=1e-3,
+        n_jobs=None,
+    ):
+        self.max_iter = max_iter
+        self.tol = tol
+
+        # kernel parameters
+        self.kernel = kernel
+        self.gamma = gamma
+        self.n_neighbors = n_neighbors
+
+        # clamping factor
+        self.alpha = alpha
+
+        self.n_jobs = n_jobs
+
+    def _get_kernel(self, X, y=None):
+        if self.kernel == "rbf":
+            if y is None:
+                return rbf_kernel(X, X, gamma=self.gamma)
+            else:
+                return rbf_kernel(X, y, gamma=self.gamma)
+        elif self.kernel == "knn":
+            if self.nn_fit is None:
+                self.nn_fit = NearestNeighbors(
+                    n_neighbors=self.n_neighbors, n_jobs=self.n_jobs
+                ).fit(X)
+            if y is None:
+                return self.nn_fit.kneighbors_graph(
+                    self.nn_fit._fit_X, self.n_neighbors, mode="connectivity"
+                )
+            else:
+                return self.nn_fit.kneighbors(y, return_distance=False)
+        elif callable(self.kernel):
+            if y is None:
+                return self.kernel(X, X)
+            else:
+                return self.kernel(X, y)
+
+    @abstractmethod
+    def _build_graph(self):
+        raise NotImplementedError(
+            "Graph construction must be implemented to fit a label propagation model."
+        )
+
+    def predict(self, X):
+        """Perform inductive inference across the model.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples,)
+            Predictions for input data.
+        """
+        # Note: since `predict` does not accept semi-supervised labels as input,
+        # `fit(X, y).predict(X) != fit(X, y).transduction_`.
+        # Hence, `fit_predict` is not implemented.
+        # See https://github.com/scikit-learn/scikit-learn/pull/24898
+        probas = self.predict_proba(X)
+        return self.classes_[np.argmax(probas, axis=1)].ravel()
+
+    def predict_proba(self, X):
+        """Predict probability for each possible outcome.
+
+        Compute the probability estimates for each single sample in X
+        and each possible outcome seen during training (categorical
+        distribution).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data matrix.
+
+        Returns
+        -------
+        probabilities : ndarray of shape (n_samples, n_classes)
+            Normalized probability distributions across
+            class labels.
+        """
+        check_is_fitted(self)
+
+        X_2d = validate_data(
+            self,
+            X,
+            accept_sparse=["csc", "csr", "coo", "dok", "bsr", "lil", "dia"],
+            reset=False,
+        )
+        weight_matrices = self._get_kernel(self.X_, X_2d)
+        if self.kernel == "knn":
+            probabilities = np.array(
+                [
+                    np.sum(self.label_distributions_[weight_matrix], axis=0)
+                    for weight_matrix in weight_matrices
+                ]
+            )
+        else:
+            weight_matrices = weight_matrices.T
+            probabilities = safe_sparse_dot(weight_matrices, self.label_distributions_)
+        normalizer = np.atleast_2d(np.sum(probabilities, axis=1)).T
+        probabilities /= normalizer
+        return probabilities
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y):
+        """Fit a semi-supervised label propagation model to X.
+
+        The input samples (labeled and unlabeled) are provided by matrix X,
+        and target labels are provided by matrix y. We conventionally apply the
+        label -1 to unlabeled samples in matrix y in a semi-supervised
+        classification.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target class values with unlabeled points marked as -1.
+            All unlabeled samples will be transductively assigned labels
+            internally, which are stored in `transduction_`.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        X, y = validate_data(
+            self,
+            X,
+            y,
+            accept_sparse=["csr", "csc"],
+            reset=True,
+        )
+        self.X_ = X
+        check_classification_targets(y)
+
+        # actual graph construction (implementations should override this)
+        graph_matrix = self._build_graph()
+
+        # label construction
+        # construct a categorical distribution for classification only
+        classes = np.unique(y)
+        classes = classes[classes != -1]
+        self.classes_ = classes
+
+        n_samples, n_classes = len(y), len(classes)
+
+        y = np.asarray(y)
+        unlabeled = y == -1
+
+        # initialize distributions
+        self.label_distributions_ = np.zeros((n_samples, n_classes))
+        for label in classes:
+            self.label_distributions_[y == label, classes == label] = 1
+
+        y_static = np.copy(self.label_distributions_)
+        if self._variant == "propagation":
+            # LabelPropagation
+            y_static[unlabeled] = 0
+        else:
+            # LabelSpreading
+            y_static *= 1 - self.alpha
+
+        l_previous = np.zeros((self.X_.shape[0], n_classes))
+
+        unlabeled = unlabeled[:, np.newaxis]
+        if sparse.issparse(graph_matrix):
+            graph_matrix = graph_matrix.tocsr()
+
+        for self.n_iter_ in range(self.max_iter):
+            if np.abs(self.label_distributions_ - l_previous).sum() < self.tol:
+                break
+
+            l_previous = self.label_distributions_
+            self.label_distributions_ = safe_sparse_dot(
+                graph_matrix, self.label_distributions_
+            )
+
+            if self._variant == "propagation":
+                normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]
+                normalizer[normalizer == 0] = 1
+                self.label_distributions_ /= normalizer
+                self.label_distributions_ = np.where(
+                    unlabeled, self.label_distributions_, y_static
+                )
+            else:
+                # clamp
+                self.label_distributions_ = (
+                    np.multiply(self.alpha, self.label_distributions_) + y_static
+                )
+        else:
+            warnings.warn(
+                "max_iter=%d was reached without convergence." % self.max_iter,
+                category=ConvergenceWarning,
+            )
+            self.n_iter_ += 1
+
+        normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]
+        normalizer[normalizer == 0] = 1
+        self.label_distributions_ /= normalizer
+
+        # set the transduction item
+        transduction = self.classes_[np.argmax(self.label_distributions_, axis=1)]
+        self.transduction_ = transduction.ravel()
+        return self
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
+
+class LabelPropagation(BaseLabelPropagation):
+    """Label Propagation classifier.
+
+    Read more in the :ref:`User Guide <label_propagation>`.
+
+    Parameters
+    ----------
+    kernel : {'knn', 'rbf'} or callable, default='rbf'
+        String identifier for kernel function to use or the kernel function
+        itself. Only 'rbf' and 'knn' strings are valid inputs. The function
+        passed should take two inputs, each of shape (n_samples, n_features),
+        and return a (n_samples, n_samples) shaped weight matrix.
+
+    gamma : float, default=20
+        Parameter for rbf kernel.
+
+    n_neighbors : int, default=7
+        Parameter for knn kernel which need to be strictly positive.
+
+    max_iter : int, default=1000
+        Change maximum number of iterations allowed.
+
+    tol : float, default=1e-3
+        Convergence tolerance: threshold to consider the system at steady
+        state.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    X_ : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Input array.
+
+    classes_ : ndarray of shape (n_classes,)
+        The distinct labels used in classifying instances.
+
+    label_distributions_ : ndarray of shape (n_samples, n_classes)
+        Categorical distribution for each item.
+
+    transduction_ : ndarray of shape (n_samples)
+        Label assigned to each item during :term:`fit`.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Number of iterations run.
+
+    See Also
+    --------
+    LabelSpreading : Alternate label propagation strategy more robust to noise.
+
+    References
+    ----------
+    Xiaojin Zhu and Zoubin Ghahramani. Learning from labeled and unlabeled data
+    with label propagation. Technical Report CMU-CALD-02-107, Carnegie Mellon
+    University, 2002 http://pages.cs.wisc.edu/~jerryzhu/pub/CMU-CALD-02-107.pdf
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn import datasets
+    >>> from sklearn.semi_supervised import LabelPropagation
+    >>> label_prop_model = LabelPropagation()
+    >>> iris = datasets.load_iris()
+    >>> rng = np.random.RandomState(42)
+    >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
+    >>> labels = np.copy(iris.target)
+    >>> labels[random_unlabeled_points] = -1
+    >>> label_prop_model.fit(iris.data, labels)
+    LabelPropagation(...)
+    """
+
+    _variant = "propagation"
+
+    _parameter_constraints: dict = {**BaseLabelPropagation._parameter_constraints}
+    _parameter_constraints.pop("alpha")
+
+    def __init__(
+        self,
+        kernel="rbf",
+        *,
+        gamma=20,
+        n_neighbors=7,
+        max_iter=1000,
+        tol=1e-3,
+        n_jobs=None,
+    ):
+        super().__init__(
+            kernel=kernel,
+            gamma=gamma,
+            n_neighbors=n_neighbors,
+            max_iter=max_iter,
+            tol=tol,
+            n_jobs=n_jobs,
+            alpha=None,
+        )
+
+    def _build_graph(self):
+        """Matrix representing a fully connected graph between each sample
+
+        This basic implementation creates a non-stochastic affinity matrix, so
+        class distributions will exceed 1 (normalization may be desired).
+        """
+        if self.kernel == "knn":
+            self.nn_fit = None
+        affinity_matrix = self._get_kernel(self.X_)
+        normalizer = affinity_matrix.sum(axis=0)
+        if sparse.issparse(affinity_matrix):
+            affinity_matrix.data /= np.diag(np.array(normalizer))
+        else:
+            affinity_matrix /= normalizer[:, np.newaxis]
+        return affinity_matrix
+
+    def fit(self, X, y):
+        """Fit a semi-supervised label propagation model to X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target class values with unlabeled points marked as -1.
+            All unlabeled samples will be transductively assigned labels
+            internally, which are stored in `transduction_`.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        return super().fit(X, y)
+
+
+class LabelSpreading(BaseLabelPropagation):
+    """LabelSpreading model for semi-supervised learning.
+
+    This model is similar to the basic Label Propagation algorithm,
+    but uses affinity matrix based on the normalized graph Laplacian
+    and soft clamping across the labels.
+
+    Read more in the :ref:`User Guide <label_propagation>`.
+
+    Parameters
+    ----------
+    kernel : {'knn', 'rbf'} or callable, default='rbf'
+        String identifier for kernel function to use or the kernel function
+        itself. Only 'rbf' and 'knn' strings are valid inputs. The function
+        passed should take two inputs, each of shape (n_samples, n_features),
+        and return a (n_samples, n_samples) shaped weight matrix.
+
+    gamma : float, default=20
+      Parameter for rbf kernel.
+
+    n_neighbors : int, default=7
+      Parameter for knn kernel which is a strictly positive integer.
+
+    alpha : float, default=0.2
+      Clamping factor. A value in (0, 1) that specifies the relative amount
+      that an instance should adopt the information from its neighbors as
+      opposed to its initial label.
+      alpha=0 means keeping the initial label information; alpha=1 means
+      replacing all initial information.
+
+    max_iter : int, default=30
+      Maximum number of iterations allowed.
+
+    tol : float, default=1e-3
+      Convergence tolerance: threshold to consider the system at steady
+      state.
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    Attributes
+    ----------
+    X_ : ndarray of shape (n_samples, n_features)
+        Input array.
+
+    classes_ : ndarray of shape (n_classes,)
+        The distinct labels used in classifying instances.
+
+    label_distributions_ : ndarray of shape (n_samples, n_classes)
+        Categorical distribution for each item.
+
+    transduction_ : ndarray of shape (n_samples,)
+        Label assigned to each item during :term:`fit`.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Number of iterations run.
+
+    See Also
+    --------
+    LabelPropagation : Unregularized graph based semi-supervised learning.
+
+    References
+    ----------
+    `Dengyong Zhou, Olivier Bousquet, Thomas Navin Lal, Jason Weston,
+    Bernhard Schoelkopf. Learning with local and global consistency (2004)
+    <https://citeseerx.ist.psu.edu/doc_view/pid/d74c37aabf2d5cae663007cbd8718175466aea8c>`_
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn import datasets
+    >>> from sklearn.semi_supervised import LabelSpreading
+    >>> label_prop_model = LabelSpreading()
+    >>> iris = datasets.load_iris()
+    >>> rng = np.random.RandomState(42)
+    >>> random_unlabeled_points = rng.rand(len(iris.target)) < 0.3
+    >>> labels = np.copy(iris.target)
+    >>> labels[random_unlabeled_points] = -1
+    >>> label_prop_model.fit(iris.data, labels)
+    LabelSpreading(...)
+    """
+
+    _variant = "spreading"
+
+    _parameter_constraints: dict = {**BaseLabelPropagation._parameter_constraints}
+    _parameter_constraints["alpha"] = [Interval(Real, 0, 1, closed="neither")]
+
+    def __init__(
+        self,
+        kernel="rbf",
+        *,
+        gamma=20,
+        n_neighbors=7,
+        alpha=0.2,
+        max_iter=30,
+        tol=1e-3,
+        n_jobs=None,
+    ):
+        # this one has different base parameters
+        super().__init__(
+            kernel=kernel,
+            gamma=gamma,
+            n_neighbors=n_neighbors,
+            alpha=alpha,
+            max_iter=max_iter,
+            tol=tol,
+            n_jobs=n_jobs,
+        )
+
+    def _build_graph(self):
+        """Graph matrix for Label Spreading computes the graph laplacian"""
+        # compute affinity matrix (or gram matrix)
+        if self.kernel == "knn":
+            self.nn_fit = None
+        n_samples = self.X_.shape[0]
+        affinity_matrix = self._get_kernel(self.X_)
+        laplacian = csgraph_laplacian(affinity_matrix, normed=True)
+        laplacian = -laplacian
+        if sparse.issparse(laplacian):
+            diag_mask = laplacian.row == laplacian.col
+            laplacian.data[diag_mask] = 0.0
+        else:
+            laplacian.flat[:: n_samples + 1] = 0.0  # set diag to 0.0
+        return laplacian
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/_self_training.py
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/_self_training.py
@@ -0,0 +1,625 @@
+import warnings
+from numbers import Integral, Real
+from warnings import warn
+
+import numpy as np
+
+from ..base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MetaEstimatorMixin,
+    _fit_context,
+    clone,
+)
+from ..utils import Bunch, get_tags, safe_mask
+from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions
+from ..utils.metadata_routing import (
+    MetadataRouter,
+    MethodMapping,
+    _raise_for_params,
+    _routing_enabled,
+    process_routing,
+)
+from ..utils.metaestimators import available_if
+from ..utils.validation import _estimator_has, check_is_fitted, validate_data
+
+__all__ = ["SelfTrainingClassifier"]
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+class SelfTrainingClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
+    """Self-training classifier.
+
+    This :term:`metaestimator` allows a given supervised classifier to function as a
+    semi-supervised classifier, allowing it to learn from unlabeled data. It
+    does this by iteratively predicting pseudo-labels for the unlabeled data
+    and adding them to the training set.
+
+    The classifier will continue iterating until either max_iter is reached, or
+    no pseudo-labels were added to the training set in the previous iteration.
+
+    Read more in the :ref:`User Guide <self_training>`.
+
+    Parameters
+    ----------
+    estimator : estimator object
+        An estimator object implementing `fit` and `predict_proba`.
+        Invoking the `fit` method will fit a clone of the passed estimator,
+        which will be stored in the `estimator_` attribute.
+
+        .. versionadded:: 1.6
+            `estimator` was added to replace `base_estimator`.
+
+    base_estimator : estimator object
+        An estimator object implementing `fit` and `predict_proba`.
+        Invoking the `fit` method will fit a clone of the passed estimator,
+        which will be stored in the `estimator_` attribute.
+
+        .. deprecated:: 1.6
+            `base_estimator` was deprecated in 1.6 and will be removed in 1.8.
+            Use `estimator` instead.
+
+    threshold : float, default=0.75
+        The decision threshold for use with `criterion='threshold'`.
+        Should be in [0, 1). When using the `'threshold'` criterion, a
+        :ref:`well calibrated classifier <calibration>` should be used.
+
+    criterion : {'threshold', 'k_best'}, default='threshold'
+        The selection criterion used to select which labels to add to the
+        training set. If `'threshold'`, pseudo-labels with prediction
+        probabilities above `threshold` are added to the dataset. If `'k_best'`,
+        the `k_best` pseudo-labels with highest prediction probabilities are
+        added to the dataset. When using the 'threshold' criterion, a
+        :ref:`well calibrated classifier <calibration>` should be used.
+
+    k_best : int, default=10
+        The amount of samples to add in each iteration. Only used when
+        `criterion='k_best'`.
+
+    max_iter : int or None, default=10
+        Maximum number of iterations allowed. Should be greater than or equal
+        to 0. If it is `None`, the classifier will continue to predict labels
+        until no new pseudo-labels are added, or all unlabeled samples have
+        been labeled.
+
+    verbose : bool, default=False
+        Enable verbose output.
+
+    Attributes
+    ----------
+    estimator_ : estimator object
+        The fitted estimator.
+
+    classes_ : ndarray or list of ndarray of shape (n_classes,)
+        Class labels for each output. (Taken from the trained
+        `estimator_`).
+
+    transduction_ : ndarray of shape (n_samples,)
+        The labels used for the final fit of the classifier, including
+        pseudo-labels added during fit.
+
+    labeled_iter_ : ndarray of shape (n_samples,)
+        The iteration in which each sample was labeled. When a sample has
+        iteration 0, the sample was already labeled in the original dataset.
+        When a sample has iteration -1, the sample was not labeled in any
+        iteration.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        The number of rounds of self-training, that is the number of times the
+        base estimator is fitted on relabeled variants of the training set.
+
+    termination_condition_ : {'max_iter', 'no_change', 'all_labeled'}
+        The reason that fitting was stopped.
+
+        - `'max_iter'`: `n_iter_` reached `max_iter`.
+        - `'no_change'`: no new labels were predicted.
+        - `'all_labeled'`: all unlabeled samples were labeled before `max_iter`
+          was reached.
+
+    See Also
+    --------
+    LabelPropagation : Label propagation classifier.
+    LabelSpreading : Label spreading model for semi-supervised learning.
+
+    References
+    ----------
+    :doi:`David Yarowsky. 1995. Unsupervised word sense disambiguation rivaling
+    supervised methods. In Proceedings of the 33rd annual meeting on
+    Association for Computational Linguistics (ACL '95). Association for
+    Computational Linguistics, Stroudsburg, PA, USA, 189-196.
+    <10.3115/981658.981684>`
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn import datasets
+    >>> from sklearn.semi_supervised import SelfTrainingClassifier
+    >>> from sklearn.svm import SVC
+    >>> rng = np.random.RandomState(42)
+    >>> iris = datasets.load_iris()
+    >>> random_unlabeled_points = rng.rand(iris.target.shape[0]) < 0.3
+    >>> iris.target[random_unlabeled_points] = -1
+    >>> svc = SVC(probability=True, gamma="auto")
+    >>> self_training_model = SelfTrainingClassifier(svc)
+    >>> self_training_model.fit(iris.data, iris.target)
+    SelfTrainingClassifier(...)
+    """
+
+    _parameter_constraints: dict = {
+        # We don't require `predic_proba` here to allow passing a meta-estimator
+        # that only exposes `predict_proba` after fitting.
+        # TODO(1.8) remove None option
+        "estimator": [None, HasMethods(["fit"])],
+        # TODO(1.8) remove
+        "base_estimator": [
+            HasMethods(["fit"]),
+            Hidden(StrOptions({"deprecated"})),
+        ],
+        "threshold": [Interval(Real, 0.0, 1.0, closed="left")],
+        "criterion": [StrOptions({"threshold", "k_best"})],
+        "k_best": [Interval(Integral, 1, None, closed="left")],
+        "max_iter": [Interval(Integral, 0, None, closed="left"), None],
+        "verbose": ["verbose"],
+    }
+
+    def __init__(
+        self,
+        estimator=None,
+        base_estimator="deprecated",
+        threshold=0.75,
+        criterion="threshold",
+        k_best=10,
+        max_iter=10,
+        verbose=False,
+    ):
+        self.estimator = estimator
+        self.threshold = threshold
+        self.criterion = criterion
+        self.k_best = k_best
+        self.max_iter = max_iter
+        self.verbose = verbose
+
+        # TODO(1.8) remove
+        self.base_estimator = base_estimator
+
+    def _get_estimator(self):
+        """Get the estimator.
+
+        Returns
+        -------
+        estimator_ : estimator object
+            The cloned estimator object.
+        """
+        # TODO(1.8): remove and only keep clone(self.estimator)
+        if self.estimator is None and self.base_estimator != "deprecated":
+            estimator_ = clone(self.base_estimator)
+
+            warn(
+                (
+                    "`base_estimator` has been deprecated in 1.6 and will be removed"
+                    " in 1.8. Please use `estimator` instead."
+                ),
+                FutureWarning,
+            )
+        # TODO(1.8) remove
+        elif self.estimator is None and self.base_estimator == "deprecated":
+            raise ValueError(
+                "You must pass an estimator to SelfTrainingClassifier. Use `estimator`."
+            )
+        elif self.estimator is not None and self.base_estimator != "deprecated":
+            raise ValueError(
+                "You must pass only one estimator to SelfTrainingClassifier."
+                " Use `estimator`."
+            )
+        else:
+            estimator_ = clone(self.estimator)
+        return estimator_
+
+    @_fit_context(
+        # SelfTrainingClassifier.estimator is not validated yet
+        prefer_skip_nested_validation=False
+    )
+    def fit(self, X, y, **params):
+        """
+        Fit self-training classifier using `X`, `y` as training data.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        y : {array-like, sparse matrix} of shape (n_samples,)
+            Array representing the labels. Unlabeled samples should have the
+            label -1.
+
+        **params : dict
+            Parameters to pass to the underlying estimators.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        _raise_for_params(params, self, "fit")
+
+        self.estimator_ = self._get_estimator()
+
+        # we need row slicing support for sparse matrices, but costly finiteness check
+        # can be delegated to the base estimator.
+        X, y = validate_data(
+            self,
+            X,
+            y,
+            accept_sparse=["csr", "csc", "lil", "dok"],
+            ensure_all_finite=False,
+        )
+
+        if y.dtype.kind in ["U", "S"]:
+            raise ValueError(
+                "y has dtype string. If you wish to predict on "
+                "string targets, use dtype object, and use -1"
+                " as the label for unlabeled samples."
+            )
+
+        has_label = y != -1
+
+        if np.all(has_label):
+            warnings.warn("y contains no unlabeled samples", UserWarning)
+
+        if self.criterion == "k_best" and (
+            self.k_best > X.shape[0] - np.sum(has_label)
+        ):
+            warnings.warn(
+                (
+                    "k_best is larger than the amount of unlabeled "
+                    "samples. All unlabeled samples will be labeled in "
+                    "the first iteration"
+                ),
+                UserWarning,
+            )
+
+        if _routing_enabled():
+            routed_params = process_routing(self, "fit", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(fit={}))
+
+        self.transduction_ = np.copy(y)
+        self.labeled_iter_ = np.full_like(y, -1)
+        self.labeled_iter_[has_label] = 0
+
+        self.n_iter_ = 0
+
+        while not np.all(has_label) and (
+            self.max_iter is None or self.n_iter_ < self.max_iter
+        ):
+            self.n_iter_ += 1
+            self.estimator_.fit(
+                X[safe_mask(X, has_label)],
+                self.transduction_[has_label],
+                **routed_params.estimator.fit,
+            )
+
+            # Predict on the unlabeled samples
+            prob = self.estimator_.predict_proba(X[safe_mask(X, ~has_label)])
+            pred = self.estimator_.classes_[np.argmax(prob, axis=1)]
+            max_proba = np.max(prob, axis=1)
+
+            # Select new labeled samples
+            if self.criterion == "threshold":
+                selected = max_proba > self.threshold
+            else:
+                n_to_select = min(self.k_best, max_proba.shape[0])
+                if n_to_select == max_proba.shape[0]:
+                    selected = np.ones_like(max_proba, dtype=bool)
+                else:
+                    # NB these are indices, not a mask
+                    selected = np.argpartition(-max_proba, n_to_select)[:n_to_select]
+
+            # Map selected indices into original array
+            selected_full = np.nonzero(~has_label)[0][selected]
+
+            # Add newly labeled confident predictions to the dataset
+            self.transduction_[selected_full] = pred[selected]
+            has_label[selected_full] = True
+            self.labeled_iter_[selected_full] = self.n_iter_
+
+            if selected_full.shape[0] == 0:
+                # no changed labels
+                self.termination_condition_ = "no_change"
+                break
+
+            if self.verbose:
+                print(
+                    f"End of iteration {self.n_iter_},"
+                    f" added {selected_full.shape[0]} new labels."
+                )
+
+        if self.n_iter_ == self.max_iter:
+            self.termination_condition_ = "max_iter"
+        if np.all(has_label):
+            self.termination_condition_ = "all_labeled"
+
+        self.estimator_.fit(
+            X[safe_mask(X, has_label)],
+            self.transduction_[has_label],
+            **routed_params.estimator.fit,
+        )
+        self.classes_ = self.estimator_.classes_
+        return self
+
+    @available_if(_estimator_has("predict"))
+    def predict(self, X, **params):
+        """Predict the classes of `X`.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's ``predict`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples,)
+            Array with predicted labels.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "predict")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "predict", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(predict={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.predict(X, **routed_params.estimator.predict)
+
+    @available_if(_estimator_has("predict_proba"))
+    def predict_proba(self, X, **params):
+        """Predict probability for each possible outcome.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's
+            ``predict_proba`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples, n_features)
+            Array with prediction probabilities.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "predict_proba")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "predict_proba", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(predict_proba={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.predict_proba(X, **routed_params.estimator.predict_proba)
+
+    @available_if(_estimator_has("decision_function"))
+    def decision_function(self, X, **params):
+        """Call decision function of the `estimator`.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's
+            ``decision_function`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples, n_features)
+            Result of the decision function of the `estimator`.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "decision_function")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "decision_function", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(decision_function={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.decision_function(
+            X, **routed_params.estimator.decision_function
+        )
+
+    @available_if(_estimator_has("predict_log_proba"))
+    def predict_log_proba(self, X, **params):
+        """Predict log probability for each possible outcome.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's
+            ``predict_log_proba`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples, n_features)
+            Array with log prediction probabilities.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "predict_log_proba")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "predict_log_proba", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(predict_log_proba={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.predict_log_proba(
+            X, **routed_params.estimator.predict_log_proba
+        )
+
+    @available_if(_estimator_has("score"))
+    def score(self, X, y, **params):
+        """Call score on the `estimator`.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Array representing the data.
+
+        y : array-like of shape (n_samples,)
+            Array representing the labels.
+
+        **params : dict of str -> object
+            Parameters to pass to the underlying estimator's ``score`` method.
+
+            .. versionadded:: 1.6
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        score : float
+            Result of calling score on the `estimator`.
+        """
+        check_is_fitted(self)
+        _raise_for_params(params, self, "score")
+
+        if _routing_enabled():
+            # metadata routing is enabled.
+            routed_params = process_routing(self, "score", **params)
+        else:
+            routed_params = Bunch(estimator=Bunch(score={}))
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=True,
+            ensure_all_finite=False,
+            reset=False,
+        )
+        return self.estimator_.score(X, y, **routed_params.estimator.score)
+
+    def get_metadata_routing(self):
+        """Get metadata routing of this object.
+
+        Please check :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
+        .. versionadded:: 1.6
+
+        Returns
+        -------
+        routing : MetadataRouter
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
+        """
+        router = MetadataRouter(owner=self.__class__.__name__)
+        router.add(
+            estimator=self.estimator,
+            method_mapping=(
+                MethodMapping()
+                .add(callee="fit", caller="fit")
+                .add(callee="score", caller="fit")
+                .add(callee="predict", caller="predict")
+                .add(callee="predict_proba", caller="predict_proba")
+                .add(callee="decision_function", caller="decision_function")
+                .add(callee="predict_log_proba", caller="predict_log_proba")
+                .add(callee="score", caller="score")
+            ),
+        )
+        return router
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        # TODO(1.8): remove the condition check together with base_estimator
+        if self.estimator is not None:
+            tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
+        return tags
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/init.py
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/pycache/test_label_propagation.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/pycache/test_label_propagation.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/pycache/test_self_training.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/pycache/test_self_training.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/test_label_propagation.py
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/test_label_propagation.py
@@ -0,0 +1,238 @@
+"""test the label propagation module"""
+
+import warnings
+
+import numpy as np
+import pytest
+from scipy.sparse import issparse
+
+from sklearn.datasets import make_classification
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import NearestNeighbors
+from sklearn.semi_supervised import _label_propagation as label_propagation
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_array_equal,
+)
+
+CONSTRUCTOR_TYPES = ("array", "sparse_csr", "sparse_csc")
+
+ESTIMATORS = [
+    (label_propagation.LabelPropagation, {"kernel": "rbf"}),
+    (label_propagation.LabelPropagation, {"kernel": "knn", "n_neighbors": 2}),
+    (
+        label_propagation.LabelPropagation,
+        {"kernel": lambda x, y: rbf_kernel(x, y, gamma=20)},
+    ),
+    (label_propagation.LabelSpreading, {"kernel": "rbf"}),
+    (label_propagation.LabelSpreading, {"kernel": "knn", "n_neighbors": 2}),
+    (
+        label_propagation.LabelSpreading,
+        {"kernel": lambda x, y: rbf_kernel(x, y, gamma=20)},
+    ),
+]
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_fit_transduction(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert clf.transduction_[2] == 1
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_distribution(global_dtype, Estimator, parameters):
+    if parameters["kernel"] == "knn":
+        pytest.skip(
+            "Unstable test for this configuration: changes in k-NN ordering break it."
+        )
+    samples = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 1.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_allclose(clf.label_distributions_[2], [0.5, 0.5], atol=1e-2)
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_predict(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_array_equal(clf.predict([[0.5, 2.5]]), np.array([1]))
+
+
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_predict_proba(global_dtype, Estimator, parameters):
+    samples = np.asarray([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]], dtype=global_dtype)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(samples, labels)
+    assert_allclose(clf.predict_proba([[1.0, 1.0]]), np.array([[0.5, 0.5]]))
+
+
+@pytest.mark.parametrize("alpha", [0.1, 0.3, 0.5, 0.7, 0.9])
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_label_spreading_closed_form(global_dtype, Estimator, parameters, alpha):
+    n_classes = 2
+    X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0)
+    X = X.astype(global_dtype, copy=False)
+    y[::3] = -1
+
+    gamma = 0.1
+    clf = label_propagation.LabelSpreading(gamma=gamma).fit(X, y)
+    # adopting notation from Zhou et al (2004):
+    S = clf._build_graph()
+    Y = np.zeros((len(y), n_classes + 1), dtype=X.dtype)
+    Y[np.arange(len(y)), y] = 1
+    Y = Y[:, :-1]
+
+    expected = np.dot(np.linalg.inv(np.eye(len(S), dtype=S.dtype) - alpha * S), Y)
+    expected /= expected.sum(axis=1)[:, np.newaxis]
+
+    clf = label_propagation.LabelSpreading(
+        max_iter=100, alpha=alpha, tol=1e-10, gamma=gamma
+    )
+    clf.fit(X, y)
+
+    assert_allclose(expected, clf.label_distributions_)
+
+
+def test_label_propagation_closed_form(global_dtype):
+    n_classes = 2
+    X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0)
+    X = X.astype(global_dtype, copy=False)
+    y[::3] = -1
+    Y = np.zeros((len(y), n_classes + 1))
+    Y[np.arange(len(y)), y] = 1
+    unlabelled_idx = Y[:, (-1,)].nonzero()[0]
+    labelled_idx = (Y[:, (-1,)] == 0).nonzero()[0]
+
+    clf = label_propagation.LabelPropagation(max_iter=100, tol=1e-10, gamma=0.1)
+    clf.fit(X, y)
+    # adopting notation from Zhu et al 2002
+    T_bar = clf._build_graph()
+    Tuu = T_bar[tuple(np.meshgrid(unlabelled_idx, unlabelled_idx, indexing="ij"))]
+    Tul = T_bar[tuple(np.meshgrid(unlabelled_idx, labelled_idx, indexing="ij"))]
+    Y = Y[:, :-1]
+    Y_l = Y[labelled_idx, :]
+    Y_u = np.dot(np.dot(np.linalg.inv(np.eye(Tuu.shape[0]) - Tuu), Tul), Y_l)
+
+    expected = Y.copy()
+    expected[unlabelled_idx, :] = Y_u
+    expected /= expected.sum(axis=1)[:, np.newaxis]
+
+    assert_allclose(expected, clf.label_distributions_, atol=1e-4)
+
+
+@pytest.mark.parametrize("accepted_sparse_type", ["sparse_csr", "sparse_csc"])
+@pytest.mark.parametrize("index_dtype", [np.int32, np.int64])
+@pytest.mark.parametrize("dtype", [np.float32, np.float64])
+@pytest.mark.parametrize("Estimator, parameters", ESTIMATORS)
+def test_sparse_input_types(
+    accepted_sparse_type, index_dtype, dtype, Estimator, parameters
+):
+    # This is non-regression test for #17085
+    X = _convert_container([[1.0, 0.0], [0.0, 2.0], [1.0, 3.0]], accepted_sparse_type)
+    X.data = X.data.astype(dtype, copy=False)
+    X.indices = X.indices.astype(index_dtype, copy=False)
+    X.indptr = X.indptr.astype(index_dtype, copy=False)
+    labels = [0, 1, -1]
+    clf = Estimator(**parameters).fit(X, labels)
+    assert_array_equal(clf.predict([[0.5, 2.5]]), np.array([1]))
+
+
+@pytest.mark.parametrize("constructor_type", CONSTRUCTOR_TYPES)
+def test_convergence_speed(constructor_type):
+    # This is a non-regression test for #5774
+    X = _convert_container([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]], constructor_type)
+    y = np.array([0, 1, -1])
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=5000)
+    mdl.fit(X, y)
+
+    # this should converge quickly:
+    assert mdl.n_iter_ < 10
+    assert_array_equal(mdl.predict(X), [0, 1, 1])
+
+
+def test_convergence_warning():
+    # This is a non-regression test for #5774
+    X = np.array([[1.0, 0.0], [0.0, 1.0], [1.0, 2.5]])
+    y = np.array([0, 1, -1])
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=1)
+    warn_msg = "max_iter=1 was reached without convergence."
+    with pytest.warns(ConvergenceWarning, match=warn_msg):
+        mdl.fit(X, y)
+    assert mdl.n_iter_ == mdl.max_iter
+
+    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=1)
+    with pytest.warns(ConvergenceWarning, match=warn_msg):
+        mdl.fit(X, y)
+    assert mdl.n_iter_ == mdl.max_iter
+
+    mdl = label_propagation.LabelSpreading(kernel="rbf", max_iter=500)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", ConvergenceWarning)
+        mdl.fit(X, y)
+
+    mdl = label_propagation.LabelPropagation(kernel="rbf", max_iter=500)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", ConvergenceWarning)
+        mdl.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "LabelPropagationCls",
+    [label_propagation.LabelSpreading, label_propagation.LabelPropagation],
+)
+def test_label_propagation_non_zero_normalizer(LabelPropagationCls):
+    # check that we don't divide by zero in case of null normalizer
+    # non-regression test for
+    # https://github.com/scikit-learn/scikit-learn/pull/15946
+    # https://github.com/scikit-learn/scikit-learn/issues/9292
+    X = np.array([[100.0, 100.0], [100.0, 100.0], [0.0, 0.0], [0.0, 0.0]])
+    y = np.array([0, 1, -1, -1])
+    mdl = LabelPropagationCls(kernel="knn", max_iter=100, n_neighbors=1)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+        mdl.fit(X, y)
+
+
+def test_predict_sparse_callable_kernel(global_dtype):
+    # This is a non-regression test for #15866
+
+    # Custom sparse kernel (top-K RBF)
+    def topk_rbf(X, Y=None, n_neighbors=10, gamma=1e-5):
+        nn = NearestNeighbors(n_neighbors=10, metric="euclidean", n_jobs=2)
+        nn.fit(X)
+        W = -1 * nn.kneighbors_graph(Y, mode="distance").power(2) * gamma
+        np.exp(W.data, out=W.data)
+        assert issparse(W)
+        return W.T
+
+    n_classes = 4
+    n_samples = 500
+    n_test = 10
+    X, y = make_classification(
+        n_classes=n_classes,
+        n_samples=n_samples,
+        n_features=20,
+        n_informative=20,
+        n_redundant=0,
+        n_repeated=0,
+        random_state=0,
+    )
+    X = X.astype(global_dtype)
+
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=n_test, random_state=0
+    )
+
+    model = label_propagation.LabelSpreading(kernel=topk_rbf)
+    model.fit(X_train, y_train)
+    assert model.score(X_test, y_test) >= 0.9
+
+    model = label_propagation.LabelPropagation(kernel=topk_rbf)
+    model.fit(X_train, y_train)
+    assert model.score(X_test, y_test) >= 0.9
--- a/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/test_self_training.py
+++ b/venv/lib/python3.12/site-packages/sklearn/semi_supervised/tests/test_self_training.py
@@ -0,0 +1,395 @@
+from math import ceil
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn.datasets import load_iris, make_blobs
+from sklearn.ensemble import StackingClassifier
+from sklearn.exceptions import NotFittedError
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.semi_supervised import SelfTrainingClassifier
+from sklearn.svm import SVC
+from sklearn.tests.test_pipeline import SimpleEstimator
+from sklearn.tree import DecisionTreeClassifier
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+# load the iris dataset and randomly permute it
+iris = load_iris()
+X_train, X_test, y_train, y_test = train_test_split(
+    iris.data, iris.target, random_state=0
+)
+
+n_labeled_samples = 50
+
+y_train_missing_labels = y_train.copy()
+y_train_missing_labels[n_labeled_samples:] = -1
+mapping = {0: "A", 1: "B", 2: "C", -1: "-1"}
+y_train_missing_strings = np.vectorize(mapping.get)(y_train_missing_labels).astype(
+    object
+)
+y_train_missing_strings[y_train_missing_labels == -1] = -1
+
+
+def test_warns_k_best():
+    st = SelfTrainingClassifier(KNeighborsClassifier(), criterion="k_best", k_best=1000)
+    with pytest.warns(UserWarning, match="k_best is larger than"):
+        st.fit(X_train, y_train_missing_labels)
+
+    assert st.termination_condition_ == "all_labeled"
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [KNeighborsClassifier(), SVC(gamma="scale", probability=True, random_state=0)],
+)
+@pytest.mark.parametrize("selection_crit", ["threshold", "k_best"])
+def test_classification(estimator, selection_crit):
+    # Check classification for various parameter settings.
+    # Also assert that predictions for strings and numerical labels are equal.
+    # Also test for multioutput classification
+    threshold = 0.75
+    max_iter = 10
+    st = SelfTrainingClassifier(
+        estimator, max_iter=max_iter, threshold=threshold, criterion=selection_crit
+    )
+    st.fit(X_train, y_train_missing_labels)
+    pred = st.predict(X_test)
+    proba = st.predict_proba(X_test)
+
+    st_string = SelfTrainingClassifier(
+        estimator, max_iter=max_iter, criterion=selection_crit, threshold=threshold
+    )
+    st_string.fit(X_train, y_train_missing_strings)
+    pred_string = st_string.predict(X_test)
+    proba_string = st_string.predict_proba(X_test)
+
+    assert_array_equal(np.vectorize(mapping.get)(pred), pred_string)
+    assert_array_equal(proba, proba_string)
+
+    assert st.termination_condition_ == st_string.termination_condition_
+    # Check consistency between labeled_iter, n_iter and max_iter
+    labeled = y_train_missing_labels != -1
+    # assert that labeled samples have labeled_iter = 0
+    assert_array_equal(st.labeled_iter_ == 0, labeled)
+    # assert that labeled samples do not change label during training
+    assert_array_equal(y_train_missing_labels[labeled], st.transduction_[labeled])
+
+    # assert that the max of the iterations is less than the total amount of
+    # iterations
+    assert np.max(st.labeled_iter_) <= st.n_iter_ <= max_iter
+    assert np.max(st_string.labeled_iter_) <= st_string.n_iter_ <= max_iter
+
+    # check shapes
+    assert st.labeled_iter_.shape == st.transduction_.shape
+    assert st_string.labeled_iter_.shape == st_string.transduction_.shape
+
+
+def test_k_best():
+    st = SelfTrainingClassifier(
+        KNeighborsClassifier(n_neighbors=1),
+        criterion="k_best",
+        k_best=10,
+        max_iter=None,
+    )
+    y_train_only_one_label = np.copy(y_train)
+    y_train_only_one_label[1:] = -1
+    n_samples = y_train.shape[0]
+
+    n_expected_iter = ceil((n_samples - 1) / 10)
+    st.fit(X_train, y_train_only_one_label)
+    assert st.n_iter_ == n_expected_iter
+
+    # Check labeled_iter_
+    assert np.sum(st.labeled_iter_ == 0) == 1
+    for i in range(1, n_expected_iter):
+        assert np.sum(st.labeled_iter_ == i) == 10
+    assert np.sum(st.labeled_iter_ == n_expected_iter) == (n_samples - 1) % 10
+    assert st.termination_condition_ == "all_labeled"
+
+
+def test_sanity_classification():
+    estimator = SVC(gamma="scale", probability=True)
+    estimator.fit(X_train[n_labeled_samples:], y_train[n_labeled_samples:])
+
+    st = SelfTrainingClassifier(estimator)
+    st.fit(X_train, y_train_missing_labels)
+
+    pred1, pred2 = estimator.predict(X_test), st.predict(X_test)
+    assert not np.array_equal(pred1, pred2)
+    score_supervised = accuracy_score(estimator.predict(X_test), y_test)
+    score_self_training = accuracy_score(st.predict(X_test), y_test)
+
+    assert score_self_training > score_supervised
+
+
+def test_none_iter():
+    # Check that the all samples were labeled after a 'reasonable' number of
+    # iterations.
+    st = SelfTrainingClassifier(KNeighborsClassifier(), threshold=0.55, max_iter=None)
+    st.fit(X_train, y_train_missing_labels)
+
+    assert st.n_iter_ < 10
+    assert st.termination_condition_ == "all_labeled"
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [KNeighborsClassifier(), SVC(gamma="scale", probability=True, random_state=0)],
+)
+@pytest.mark.parametrize("y", [y_train_missing_labels, y_train_missing_strings])
+def test_zero_iterations(estimator, y):
+    # Check classification for zero iterations.
+    # Fitting a SelfTrainingClassifier with zero iterations should give the
+    # same results as fitting a supervised classifier.
+    # This also asserts that string arrays work as expected.
+
+    clf1 = SelfTrainingClassifier(estimator, max_iter=0)
+
+    clf1.fit(X_train, y)
+
+    clf2 = estimator.fit(X_train[:n_labeled_samples], y[:n_labeled_samples])
+
+    assert_array_equal(clf1.predict(X_test), clf2.predict(X_test))
+    assert clf1.termination_condition_ == "max_iter"
+
+
+def test_prefitted_throws_error():
+    # Test that passing a pre-fitted classifier and calling predict throws an
+    # error
+    knn = KNeighborsClassifier()
+    knn.fit(X_train, y_train)
+    st = SelfTrainingClassifier(knn)
+    with pytest.raises(
+        NotFittedError,
+        match="This SelfTrainingClassifier instance is not fitted yet",
+    ):
+        st.predict(X_train)
+
+
+@pytest.mark.parametrize("max_iter", range(1, 5))
+def test_labeled_iter(max_iter):
+    # Check that the amount of datapoints labeled in iteration 0 is equal to
+    # the amount of labeled datapoints we passed.
+    st = SelfTrainingClassifier(KNeighborsClassifier(), max_iter=max_iter)
+
+    st.fit(X_train, y_train_missing_labels)
+    amount_iter_0 = len(st.labeled_iter_[st.labeled_iter_ == 0])
+    assert amount_iter_0 == n_labeled_samples
+    # Check that the max of the iterations is less than the total amount of
+    # iterations
+    assert np.max(st.labeled_iter_) <= st.n_iter_ <= max_iter
+
+
+def test_no_unlabeled():
+    # Test that training on a fully labeled dataset produces the same results
+    # as training the classifier by itself.
+    knn = KNeighborsClassifier()
+    knn.fit(X_train, y_train)
+    st = SelfTrainingClassifier(knn)
+    with pytest.warns(UserWarning, match="y contains no unlabeled samples"):
+        st.fit(X_train, y_train)
+    assert_array_equal(knn.predict(X_test), st.predict(X_test))
+    # Assert that all samples were labeled in iteration 0 (since there were no
+    # unlabeled samples).
+    assert np.all(st.labeled_iter_ == 0)
+    assert st.termination_condition_ == "all_labeled"
+
+
+def test_early_stopping():
+    svc = SVC(gamma="scale", probability=True)
+    st = SelfTrainingClassifier(svc)
+    X_train_easy = [[1], [0], [1], [0.5]]
+    y_train_easy = [1, 0, -1, -1]
+    # X = [[0.5]] cannot be predicted on with a high confidence, so training
+    # stops early
+    st.fit(X_train_easy, y_train_easy)
+    assert st.n_iter_ == 1
+    assert st.termination_condition_ == "no_change"
+
+
+def test_strings_dtype():
+    clf = SelfTrainingClassifier(KNeighborsClassifier())
+    X, y = make_blobs(n_samples=30, random_state=0, cluster_std=0.1)
+    labels_multiclass = ["one", "two", "three"]
+
+    y_strings = np.take(labels_multiclass, y)
+
+    with pytest.raises(ValueError, match="dtype"):
+        clf.fit(X, y_strings)
+
+
+@pytest.mark.parametrize("verbose", [True, False])
+def test_verbose(capsys, verbose):
+    clf = SelfTrainingClassifier(KNeighborsClassifier(), verbose=verbose)
+    clf.fit(X_train, y_train_missing_labels)
+
+    captured = capsys.readouterr()
+
+    if verbose:
+        assert "iteration" in captured.out
+    else:
+        assert "iteration" not in captured.out
+
+
+def test_verbose_k_best(capsys):
+    st = SelfTrainingClassifier(
+        KNeighborsClassifier(n_neighbors=1),
+        criterion="k_best",
+        k_best=10,
+        verbose=True,
+        max_iter=None,
+    )
+
+    y_train_only_one_label = np.copy(y_train)
+    y_train_only_one_label[1:] = -1
+    n_samples = y_train.shape[0]
+
+    n_expected_iter = ceil((n_samples - 1) / 10)
+    st.fit(X_train, y_train_only_one_label)
+
+    captured = capsys.readouterr()
+
+    msg = "End of iteration {}, added {} new labels."
+    for i in range(1, n_expected_iter):
+        assert msg.format(i, 10) in captured.out
+
+    assert msg.format(n_expected_iter, (n_samples - 1) % 10) in captured.out
+
+
+def test_k_best_selects_best():
+    # Tests that the labels added by st really are the 10 best labels.
+    svc = SVC(gamma="scale", probability=True, random_state=0)
+    st = SelfTrainingClassifier(svc, criterion="k_best", max_iter=1, k_best=10)
+    has_label = y_train_missing_labels != -1
+    st.fit(X_train, y_train_missing_labels)
+
+    got_label = ~has_label & (st.transduction_ != -1)
+
+    svc.fit(X_train[has_label], y_train_missing_labels[has_label])
+    pred = svc.predict_proba(X_train[~has_label])
+    max_proba = np.max(pred, axis=1)
+
+    most_confident_svc = X_train[~has_label][np.argsort(max_proba)[-10:]]
+    added_by_st = X_train[np.where(got_label)].tolist()
+
+    for row in most_confident_svc.tolist():
+        assert row in added_by_st
+
+
+def test_estimator_meta_estimator():
+    # Check that a meta-estimator relying on an estimator implementing
+    # `predict_proba` will work even if it does not expose this method before being
+    # fitted.
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/19119
+
+    estimator = StackingClassifier(
+        estimators=[
+            ("svc_1", SVC(probability=True)),
+            ("svc_2", SVC(probability=True)),
+        ],
+        final_estimator=SVC(probability=True),
+        cv=2,
+    )
+
+    assert hasattr(estimator, "predict_proba")
+    clf = SelfTrainingClassifier(estimator=estimator)
+    clf.fit(X_train, y_train_missing_labels)
+    clf.predict_proba(X_test)
+
+    estimator = StackingClassifier(
+        estimators=[
+            ("svc_1", SVC(probability=False)),
+            ("svc_2", SVC(probability=False)),
+        ],
+        final_estimator=SVC(probability=False),
+        cv=2,
+    )
+
+    assert not hasattr(estimator, "predict_proba")
+    clf = SelfTrainingClassifier(estimator=estimator)
+    with pytest.raises(AttributeError):
+        clf.fit(X_train, y_train_missing_labels)
+
+
+def test_self_training_estimator_attribute_error():
+    """Check that we raise the proper AttributeErrors when the `estimator`
+    does not implement the `predict_proba` method, which is called from within
+    `fit`, or `decision_function`, which is decorated with `available_if`.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/28108
+    """
+    # `SVC` with `probability=False` does not implement 'predict_proba' that
+    # is required internally in `fit` of `SelfTrainingClassifier`. We expect
+    # an AttributeError to be raised.
+    estimator = SVC(probability=False, gamma="scale")
+    self_training = SelfTrainingClassifier(estimator)
+
+    with pytest.raises(AttributeError, match="has no attribute 'predict_proba'"):
+        self_training.fit(X_train, y_train_missing_labels)
+
+    # `DecisionTreeClassifier` does not implement 'decision_function' and
+    # should raise an AttributeError
+    self_training = SelfTrainingClassifier(estimator=DecisionTreeClassifier())
+
+    outer_msg = "This 'SelfTrainingClassifier' has no attribute 'decision_function'"
+    inner_msg = "'DecisionTreeClassifier' object has no attribute 'decision_function'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        self_training.fit(X_train, y_train_missing_labels).decision_function(X_train)
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
+
+
+# TODO(1.8): remove in 1.8
+def test_deprecation_warning_base_estimator():
+    warn_msg = "`base_estimator` has been deprecated in 1.6 and will be removed"
+    with pytest.warns(FutureWarning, match=warn_msg):
+        SelfTrainingClassifier(base_estimator=DecisionTreeClassifier()).fit(
+            X_train, y_train_missing_labels
+        )
+
+    error_msg = "You must pass an estimator to SelfTrainingClassifier"
+    with pytest.raises(ValueError, match=error_msg):
+        SelfTrainingClassifier().fit(X_train, y_train_missing_labels)
+
+    error_msg = "You must pass only one estimator to SelfTrainingClassifier."
+    with pytest.raises(ValueError, match=error_msg):
+        SelfTrainingClassifier(
+            base_estimator=DecisionTreeClassifier(), estimator=DecisionTreeClassifier()
+        ).fit(X_train, y_train_missing_labels)
+
+
+# Metadata routing tests
+# =================================================================
+
+
+@pytest.mark.filterwarnings("ignore:y contains no unlabeled samples:UserWarning")
+@pytest.mark.parametrize(
+    "method", ["decision_function", "predict_log_proba", "predict_proba", "predict"]
+)
+def test_routing_passed_metadata_not_supported(method):
+    """Test that the right error message is raised when metadata is passed while
+    not supported when `enable_metadata_routing=False`."""
+    est = SelfTrainingClassifier(estimator=SimpleEstimator())
+    with pytest.raises(
+        ValueError, match="is only supported if enable_metadata_routing=True"
+    ):
+        est.fit([[1], [1]], [1, 1], sample_weight=[1], prop="a")
+
+    est = SelfTrainingClassifier(estimator=SimpleEstimator())
+    with pytest.raises(
+        ValueError, match="is only supported if enable_metadata_routing=True"
+    ):
+        # make sure that the estimator thinks it is already fitted
+        est.fitted_params_ = True
+        getattr(est, method)([[1]], sample_weight=[1], prop="a")
+
+
+# End of routing tests
+# ====================