add read me

2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/init.py
@@ -0,0 +1,54 @@
+"""Matrix decomposition algorithms.
+
+These include PCA, NMF, ICA, and more. Most of the algorithms of this module can be
+regarded as dimensionality reduction techniques.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from ..utils.extmath import randomized_svd
+from ._dict_learning import (
+    DictionaryLearning,
+    MiniBatchDictionaryLearning,
+    SparseCoder,
+    dict_learning,
+    dict_learning_online,
+    sparse_encode,
+)
+from ._factor_analysis import FactorAnalysis
+from ._fastica import FastICA, fastica
+from ._incremental_pca import IncrementalPCA
+from ._kernel_pca import KernelPCA
+from ._lda import LatentDirichletAllocation
+from ._nmf import (
+    NMF,
+    MiniBatchNMF,
+    non_negative_factorization,
+)
+from ._pca import PCA
+from ._sparse_pca import MiniBatchSparsePCA, SparsePCA
+from ._truncated_svd import TruncatedSVD
+
+__all__ = [
+    "NMF",
+    "PCA",
+    "DictionaryLearning",
+    "FactorAnalysis",
+    "FastICA",
+    "IncrementalPCA",
+    "KernelPCA",
+    "LatentDirichletAllocation",
+    "MiniBatchDictionaryLearning",
+    "MiniBatchNMF",
+    "MiniBatchSparsePCA",
+    "SparseCoder",
+    "SparsePCA",
+    "TruncatedSVD",
+    "dict_learning",
+    "dict_learning_online",
+    "fastica",
+    "non_negative_factorization",
+    "randomized_svd",
+    "sparse_encode",
+]
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_base.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_base.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_dict_learning.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_dict_learning.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_factor_analysis.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_factor_analysis.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_fastica.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_fastica.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_incremental_pca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_incremental_pca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_kernel_pca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_kernel_pca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_lda.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_lda.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_nmf.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_nmf.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_pca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_pca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_sparse_pca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_sparse_pca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_truncated_svd.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/pycache/_truncated_svd.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_base.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_base.py
@@ -0,0 +1,202 @@
+"""Principal Component Analysis Base Classes"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from scipy import linalg
+
+from ..base import BaseEstimator, ClassNamePrefixFeaturesOutMixin, TransformerMixin
+from ..utils._array_api import _fill_or_add_to_diagonal, device, get_namespace
+from ..utils.validation import check_is_fitted, validate_data
+
+
+class _BasePCA(
+    ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator, metaclass=ABCMeta
+):
+    """Base class for PCA methods.
+
+    Warning: This class should not be used directly.
+    Use derived classes instead.
+    """
+
+    def get_covariance(self):
+        """Compute data covariance with the generative model.
+
+        ``cov = components_.T * S**2 * components_ + sigma2 * eye(n_features)``
+        where S**2 contains the explained variances, and sigma2 contains the
+        noise variances.
+
+        Returns
+        -------
+        cov : array of shape=(n_features, n_features)
+            Estimated covariance of data.
+        """
+        xp, _ = get_namespace(self.components_)
+
+        components_ = self.components_
+        exp_var = self.explained_variance_
+        if self.whiten:
+            components_ = components_ * xp.sqrt(exp_var[:, np.newaxis])
+        exp_var_diff = exp_var - self.noise_variance_
+        exp_var_diff = xp.where(
+            exp_var > self.noise_variance_,
+            exp_var_diff,
+            xp.asarray(0.0, device=device(exp_var), dtype=exp_var.dtype),
+        )
+        cov = (components_.T * exp_var_diff) @ components_
+        _fill_or_add_to_diagonal(cov, self.noise_variance_, xp)
+        return cov
+
+    def get_precision(self):
+        """Compute data precision matrix with the generative model.
+
+        Equals the inverse of the covariance but computed with
+        the matrix inversion lemma for efficiency.
+
+        Returns
+        -------
+        precision : array, shape=(n_features, n_features)
+            Estimated precision of data.
+        """
+        xp, is_array_api_compliant = get_namespace(self.components_)
+
+        n_features = self.components_.shape[1]
+
+        # handle corner cases first
+        if self.n_components_ == 0:
+            return xp.eye(n_features) / self.noise_variance_
+
+        if is_array_api_compliant:
+            linalg_inv = xp.linalg.inv
+        else:
+            linalg_inv = linalg.inv
+
+        if self.noise_variance_ == 0.0:
+            return linalg_inv(self.get_covariance())
+
+        # Get precision using matrix inversion lemma
+        components_ = self.components_
+        exp_var = self.explained_variance_
+        if self.whiten:
+            components_ = components_ * xp.sqrt(exp_var[:, np.newaxis])
+        exp_var_diff = exp_var - self.noise_variance_
+        exp_var_diff = xp.where(
+            exp_var > self.noise_variance_,
+            exp_var_diff,
+            xp.asarray(0.0, device=device(exp_var)),
+        )
+        precision = components_ @ components_.T / self.noise_variance_
+        _fill_or_add_to_diagonal(precision, 1.0 / exp_var_diff, xp)
+        precision = components_.T @ linalg_inv(precision) @ components_
+        precision /= -(self.noise_variance_**2)
+        _fill_or_add_to_diagonal(precision, 1.0 / self.noise_variance_, xp)
+        return precision
+
+    @abstractmethod
+    def fit(self, X, y=None):
+        """Placeholder for fit. Subclasses should implement this method!
+
+        Fit the model with X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+
+    def transform(self, X):
+        """Apply dimensionality reduction to X.
+
+        X is projected on the first principal components previously extracted
+        from a training set.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            New data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        Returns
+        -------
+        X_new : array-like of shape (n_samples, n_components)
+            Projection of X in the first principal components, where `n_samples`
+            is the number of samples and `n_components` is the number of the components.
+        """
+        xp, _ = get_namespace(X, self.components_, self.explained_variance_)
+
+        check_is_fitted(self)
+
+        X = validate_data(
+            self,
+            X,
+            dtype=[xp.float64, xp.float32],
+            accept_sparse=("csr", "csc"),
+            reset=False,
+        )
+        return self._transform(X, xp=xp, x_is_centered=False)
+
+    def _transform(self, X, xp, x_is_centered=False):
+        X_transformed = X @ self.components_.T
+        if not x_is_centered:
+            # Apply the centering after the projection.
+            # For dense X this avoids copying or mutating the data passed by
+            # the caller.
+            # For sparse X it keeps sparsity and avoids having to wrap X into
+            # a linear operator.
+            X_transformed -= xp.reshape(self.mean_, (1, -1)) @ self.components_.T
+        if self.whiten:
+            # For some solvers (such as "arpack" and "covariance_eigh"), on
+            # rank deficient data, some components can have a variance
+            # arbitrarily close to zero, leading to non-finite results when
+            # whitening. To avoid this problem we clip the variance below.
+            scale = xp.sqrt(self.explained_variance_)
+            min_scale = xp.finfo(scale.dtype).eps
+            scale[scale < min_scale] = min_scale
+            X_transformed /= scale
+        return X_transformed
+
+    def inverse_transform(self, X):
+        """Transform data back to its original space.
+
+        In other words, return an input `X_original` whose transform would be X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_components)
+            New data, where `n_samples` is the number of samples
+            and `n_components` is the number of components.
+
+        Returns
+        -------
+        X_original : array-like of shape (n_samples, n_features)
+            Original data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        Notes
+        -----
+        If whitening is enabled, inverse_transform will compute the
+        exact inverse operation, which includes reversing whitening.
+        """
+        xp, _ = get_namespace(X)
+
+        if self.whiten:
+            scaled_components = (
+                xp.sqrt(self.explained_variance_[:, np.newaxis]) * self.components_
+            )
+            return X @ scaled_components + self.mean_
+        else:
+            return X @ self.components_ + self.mean_
+
+    @property
+    def _n_features_out(self):
+        """Number of transformed output features."""
+        return self.components_.shape[0]
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_cdnmf_fast.cpython-312-x86_64-linux-gnu.so
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_cdnmf_fast.cpython-312-x86_64-linux-gnu.so
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_cdnmf_fast.pyx
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_cdnmf_fast.pyx
@@ -0,0 +1,38 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from cython cimport floating
+from libc.math cimport fabs
+
+
+def _update_cdnmf_fast(floating[:, ::1] W, floating[:, :] HHt,
+                       floating[:, :] XHt, Py_ssize_t[::1] permutation):
+    cdef:
+        floating violation = 0
+        Py_ssize_t n_components = W.shape[1]
+        Py_ssize_t n_samples = W.shape[0]  # n_features for H update
+        floating grad, pg, hess
+        Py_ssize_t i, r, s, t
+
+    with nogil:
+        for s in range(n_components):
+            t = permutation[s]
+
+            for i in range(n_samples):
+                # gradient = GW[t, i] where GW = np.dot(W, HHt) - XHt
+                grad = -XHt[i, t]
+
+                for r in range(n_components):
+                    grad += HHt[t, r] * W[i, r]
+
+                # projected gradient
+                pg = min(0., grad) if W[i, t] == 0 else grad
+                violation += fabs(pg)
+
+                # Hessian
+                hess = HHt[t, t]
+
+                if hess != 0:
+                    W[i, t] = max(W[i, t] - grad / hess, 0.)
+
+    return violation
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_dict_learning.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_dict_learning.py
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_factor_analysis.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_factor_analysis.py
@@ -0,0 +1,457 @@
+"""Factor Analysis.
+
+A latent linear variable model.
+
+FactorAnalysis is similar to probabilistic PCA implemented by PCA.score
+While PCA assumes Gaussian noise with the same variance for each
+feature, the FactorAnalysis model assumes different variances for
+each of them.
+
+This implementation is based on David Barber's Book,
+Bayesian Reasoning and Machine Learning,
+http://www.cs.ucl.ac.uk/staff/d.barber/brml,
+Algorithm 21.1
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+from math import log, sqrt
+from numbers import Integral, Real
+
+import numpy as np
+from scipy import linalg
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..exceptions import ConvergenceWarning
+from ..utils import check_random_state
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import _randomized_svd, fast_logdet, squared_norm
+from ..utils.validation import check_is_fitted, validate_data
+
+
+class FactorAnalysis(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
+    """Factor Analysis (FA).
+
+    A simple linear generative model with Gaussian latent variables.
+
+    The observations are assumed to be caused by a linear transformation of
+    lower dimensional latent factors and added Gaussian noise.
+    Without loss of generality the factors are distributed according to a
+    Gaussian with zero mean and unit covariance. The noise is also zero mean
+    and has an arbitrary diagonal covariance matrix.
+
+    If we would restrict the model further, by assuming that the Gaussian
+    noise is even isotropic (all diagonal entries are the same) we would obtain
+    :class:`PCA`.
+
+    FactorAnalysis performs a maximum likelihood estimate of the so-called
+    `loading` matrix, the transformation of the latent variables to the
+    observed ones, using SVD based approach.
+
+    Read more in the :ref:`User Guide <FA>`.
+
+    .. versionadded:: 0.13
+
+    Parameters
+    ----------
+    n_components : int, default=None
+        Dimensionality of latent space, the number of components
+        of ``X`` that are obtained after ``transform``.
+        If None, n_components is set to the number of features.
+
+    tol : float, default=1e-2
+        Stopping tolerance for log-likelihood increase.
+
+    copy : bool, default=True
+        Whether to make a copy of X. If ``False``, the input X gets overwritten
+        during fitting.
+
+    max_iter : int, default=1000
+        Maximum number of iterations.
+
+    noise_variance_init : array-like of shape (n_features,), default=None
+        The initial guess of the noise variance for each feature.
+        If None, it defaults to np.ones(n_features).
+
+    svd_method : {'lapack', 'randomized'}, default='randomized'
+        Which SVD method to use. If 'lapack' use standard SVD from
+        scipy.linalg, if 'randomized' use fast ``randomized_svd`` function.
+        Defaults to 'randomized'. For most applications 'randomized' will
+        be sufficiently precise while providing significant speed gains.
+        Accuracy can also be improved by setting higher values for
+        `iterated_power`. If this is not sufficient, for maximum precision
+        you should choose 'lapack'.
+
+    iterated_power : int, default=3
+        Number of iterations for the power method. 3 by default. Only used
+        if ``svd_method`` equals 'randomized'.
+
+    rotation : {'varimax', 'quartimax'}, default=None
+        If not None, apply the indicated rotation. Currently, varimax and
+        quartimax are implemented. See
+        `"The varimax criterion for analytic rotation in factor analysis"
+        <https://link.springer.com/article/10.1007%2FBF02289233>`_
+        H. F. Kaiser, 1958.
+
+        .. versionadded:: 0.24
+
+    random_state : int or RandomState instance, default=0
+        Only used when ``svd_method`` equals 'randomized'. Pass an int for
+        reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        Components with maximum variance.
+
+    loglike_ : list of shape (n_iterations,)
+        The log likelihood at each iteration.
+
+    noise_variance_ : ndarray of shape (n_features,)
+        The estimated noise variance for each feature.
+
+    n_iter_ : int
+        Number of iterations run.
+
+    mean_ : ndarray of shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    PCA: Principal component analysis is also a latent linear variable model
+        which however assumes equal noise variance for each feature.
+        This extra assumption makes probabilistic PCA faster as it can be
+        computed in closed form.
+    FastICA: Independent component analysis, a latent variable model with
+        non-Gaussian latent variables.
+
+    References
+    ----------
+    - David Barber, Bayesian Reasoning and Machine Learning,
+      Algorithm 21.1.
+
+    - Christopher M. Bishop: Pattern Recognition and Machine Learning,
+      Chapter 12.2.4.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.decomposition import FactorAnalysis
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> transformer = FactorAnalysis(n_components=7, random_state=0)
+    >>> X_transformed = transformer.fit_transform(X)
+    >>> X_transformed.shape
+    (1797, 7)
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [Interval(Integral, 0, None, closed="left"), None],
+        "tol": [Interval(Real, 0.0, None, closed="left")],
+        "copy": ["boolean"],
+        "max_iter": [Interval(Integral, 1, None, closed="left")],
+        "noise_variance_init": ["array-like", None],
+        "svd_method": [StrOptions({"randomized", "lapack"})],
+        "iterated_power": [Interval(Integral, 0, None, closed="left")],
+        "rotation": [StrOptions({"varimax", "quartimax"}), None],
+        "random_state": ["random_state"],
+    }
+
+    def __init__(
+        self,
+        n_components=None,
+        *,
+        tol=1e-2,
+        copy=True,
+        max_iter=1000,
+        noise_variance_init=None,
+        svd_method="randomized",
+        iterated_power=3,
+        rotation=None,
+        random_state=0,
+    ):
+        self.n_components = n_components
+        self.copy = copy
+        self.tol = tol
+        self.max_iter = max_iter
+        self.svd_method = svd_method
+
+        self.noise_variance_init = noise_variance_init
+        self.iterated_power = iterated_power
+        self.random_state = random_state
+        self.rotation = rotation
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the FactorAnalysis model to X using SVD based approach.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+
+        y : Ignored
+            Ignored parameter.
+
+        Returns
+        -------
+        self : object
+            FactorAnalysis class instance.
+        """
+        X = validate_data(
+            self, X, copy=self.copy, dtype=np.float64, force_writeable=True
+        )
+
+        n_samples, n_features = X.shape
+        n_components = self.n_components
+        if n_components is None:
+            n_components = n_features
+
+        self.mean_ = np.mean(X, axis=0)
+        X -= self.mean_
+
+        # some constant terms
+        nsqrt = sqrt(n_samples)
+        llconst = n_features * log(2.0 * np.pi) + n_components
+        var = np.var(X, axis=0)
+
+        if self.noise_variance_init is None:
+            psi = np.ones(n_features, dtype=X.dtype)
+        else:
+            if len(self.noise_variance_init) != n_features:
+                raise ValueError(
+                    "noise_variance_init dimension does not "
+                    "with number of features : %d != %d"
+                    % (len(self.noise_variance_init), n_features)
+                )
+            psi = np.array(self.noise_variance_init)
+
+        loglike = []
+        old_ll = -np.inf
+        SMALL = 1e-12
+
+        # we'll modify svd outputs to return unexplained variance
+        # to allow for unified computation of loglikelihood
+        if self.svd_method == "lapack":
+
+            def my_svd(X):
+                _, s, Vt = linalg.svd(X, full_matrices=False, check_finite=False)
+                return (
+                    s[:n_components],
+                    Vt[:n_components],
+                    squared_norm(s[n_components:]),
+                )
+
+        else:  # svd_method == "randomized"
+            random_state = check_random_state(self.random_state)
+
+            def my_svd(X):
+                _, s, Vt = _randomized_svd(
+                    X,
+                    n_components,
+                    random_state=random_state,
+                    n_iter=self.iterated_power,
+                )
+                return s, Vt, squared_norm(X) - squared_norm(s)
+
+        for i in range(self.max_iter):
+            # SMALL helps numerics
+            sqrt_psi = np.sqrt(psi) + SMALL
+            s, Vt, unexp_var = my_svd(X / (sqrt_psi * nsqrt))
+            s **= 2
+            # Use 'maximum' here to avoid sqrt problems.
+            W = np.sqrt(np.maximum(s - 1.0, 0.0))[:, np.newaxis] * Vt
+            del Vt
+            W *= sqrt_psi
+
+            # loglikelihood
+            ll = llconst + np.sum(np.log(s))
+            ll += unexp_var + np.sum(np.log(psi))
+            ll *= -n_samples / 2.0
+            loglike.append(ll)
+            if (ll - old_ll) < self.tol:
+                break
+            old_ll = ll
+
+            psi = np.maximum(var - np.sum(W**2, axis=0), SMALL)
+        else:
+            warnings.warn(
+                "FactorAnalysis did not converge."
+                " You might want"
+                " to increase the number of iterations.",
+                ConvergenceWarning,
+            )
+
+        self.components_ = W
+        if self.rotation is not None:
+            self.components_ = self._rotate(W)
+        self.noise_variance_ = psi
+        self.loglike_ = loglike
+        self.n_iter_ = i + 1
+        return self
+
+    def transform(self, X):
+        """Apply dimensionality reduction to X using the model.
+
+        Compute the expected mean of the latent variables.
+        See Barber, 21.2.33 (or Bishop, 12.66).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            The latent variables of X.
+        """
+        check_is_fitted(self)
+
+        X = validate_data(self, X, reset=False)
+        Ih = np.eye(len(self.components_))
+
+        X_transformed = X - self.mean_
+
+        Wpsi = self.components_ / self.noise_variance_
+        cov_z = linalg.inv(Ih + np.dot(Wpsi, self.components_.T))
+        tmp = np.dot(X_transformed, Wpsi.T)
+        X_transformed = np.dot(tmp, cov_z)
+
+        return X_transformed
+
+    def get_covariance(self):
+        """Compute data covariance with the FactorAnalysis model.
+
+        ``cov = components_.T * components_ + diag(noise_variance)``
+
+        Returns
+        -------
+        cov : ndarray of shape (n_features, n_features)
+            Estimated covariance of data.
+        """
+        check_is_fitted(self)
+
+        cov = np.dot(self.components_.T, self.components_)
+        cov.flat[:: len(cov) + 1] += self.noise_variance_  # modify diag inplace
+        return cov
+
+    def get_precision(self):
+        """Compute data precision matrix with the FactorAnalysis model.
+
+        Returns
+        -------
+        precision : ndarray of shape (n_features, n_features)
+            Estimated precision of data.
+        """
+        check_is_fitted(self)
+
+        n_features = self.components_.shape[1]
+
+        # handle corner cases first
+        if self.n_components == 0:
+            return np.diag(1.0 / self.noise_variance_)
+        if self.n_components == n_features:
+            return linalg.inv(self.get_covariance())
+
+        # Get precision using matrix inversion lemma
+        components_ = self.components_
+        precision = np.dot(components_ / self.noise_variance_, components_.T)
+        precision.flat[:: len(precision) + 1] += 1.0
+        precision = np.dot(components_.T, np.dot(linalg.inv(precision), components_))
+        precision /= self.noise_variance_[:, np.newaxis]
+        precision /= -self.noise_variance_[np.newaxis, :]
+        precision.flat[:: len(precision) + 1] += 1.0 / self.noise_variance_
+        return precision
+
+    def score_samples(self, X):
+        """Compute the log-likelihood of each sample.
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            The data.
+
+        Returns
+        -------
+        ll : ndarray of shape (n_samples,)
+            Log-likelihood of each sample under the current model.
+        """
+        check_is_fitted(self)
+        X = validate_data(self, X, reset=False)
+        Xr = X - self.mean_
+        precision = self.get_precision()
+        n_features = X.shape[1]
+        log_like = -0.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
+        log_like -= 0.5 * (n_features * log(2.0 * np.pi) - fast_logdet(precision))
+        return log_like
+
+    def score(self, X, y=None):
+        """Compute the average log-likelihood of the samples.
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            The data.
+
+        y : Ignored
+            Ignored parameter.
+
+        Returns
+        -------
+        ll : float
+            Average log-likelihood of the samples under the current model.
+        """
+        return np.mean(self.score_samples(X))
+
+    def _rotate(self, components, n_components=None, tol=1e-6):
+        "Rotate the factor analysis solution."
+        # note that tol is not exposed
+        return _ortho_rotation(components.T, method=self.rotation, tol=tol)[
+            : self.n_components
+        ]
+
+    @property
+    def _n_features_out(self):
+        """Number of transformed output features."""
+        return self.components_.shape[0]
+
+
+def _ortho_rotation(components, method="varimax", tol=1e-6, max_iter=100):
+    """Return rotated components."""
+    nrow, ncol = components.shape
+    rotation_matrix = np.eye(ncol)
+    var = 0
+
+    for _ in range(max_iter):
+        comp_rot = np.dot(components, rotation_matrix)
+        if method == "varimax":
+            tmp = comp_rot * np.transpose((comp_rot**2).sum(axis=0) / nrow)
+        elif method == "quartimax":
+            tmp = 0
+        u, s, v = np.linalg.svd(np.dot(components.T, comp_rot**3 - tmp))
+        rotation_matrix = np.dot(u, v)
+        var_new = np.sum(s)
+        if var != 0 and var_new < var * (1 + tol):
+            break
+        var = var_new
+
+    return np.dot(components, rotation_matrix).T
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_fastica.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_fastica.py
@@ -0,0 +1,804 @@
+"""
+Python implementation of the fast ICA algorithms.
+
+Reference: Tables 8.3 and 8.4 page 196 in the book:
+Independent Component Analysis, by  Hyvarinen et al.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+from numbers import Integral, Real
+
+import numpy as np
+from scipy import linalg
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..exceptions import ConvergenceWarning
+from ..utils import as_float_array, check_array, check_random_state
+from ..utils._param_validation import Interval, Options, StrOptions, validate_params
+from ..utils.validation import check_is_fitted, validate_data
+
+__all__ = ["FastICA", "fastica"]
+
+
+def _gs_decorrelation(w, W, j):
+    """
+    Orthonormalize w wrt the first j rows of W.
+
+    Parameters
+    ----------
+    w : ndarray of shape (n,)
+        Array to be orthogonalized
+
+    W : ndarray of shape (p, n)
+        Null space definition
+
+    j : int < p
+        The no of (from the first) rows of Null space W wrt which w is
+        orthogonalized.
+
+    Notes
+    -----
+    Assumes that W is orthogonal
+    w changed in place
+    """
+    w -= np.linalg.multi_dot([w, W[:j].T, W[:j]])
+    return w
+
+
+def _sym_decorrelation(W):
+    """Symmetric decorrelation
+    i.e. W <- (W * W.T) ^{-1/2} * W
+    """
+    s, u = linalg.eigh(np.dot(W, W.T))
+    # Avoid sqrt of negative values because of rounding errors. Note that
+    # np.sqrt(tiny) is larger than tiny and therefore this clipping also
+    # prevents division by zero in the next step.
+    s = np.clip(s, a_min=np.finfo(W.dtype).tiny, a_max=None)
+
+    # u (resp. s) contains the eigenvectors (resp. square roots of
+    # the eigenvalues) of W * W.T
+    return np.linalg.multi_dot([u * (1.0 / np.sqrt(s)), u.T, W])
+
+
+def _ica_def(X, tol, g, fun_args, max_iter, w_init):
+    """Deflationary FastICA using fun approx to neg-entropy function
+
+    Used internally by FastICA.
+    """
+
+    n_components = w_init.shape[0]
+    W = np.zeros((n_components, n_components), dtype=X.dtype)
+    n_iter = []
+
+    # j is the index of the extracted component
+    for j in range(n_components):
+        w = w_init[j, :].copy()
+        w /= np.sqrt((w**2).sum())
+
+        for i in range(max_iter):
+            gwtx, g_wtx = g(np.dot(w.T, X), fun_args)
+
+            w1 = (X * gwtx).mean(axis=1) - g_wtx.mean() * w
+
+            _gs_decorrelation(w1, W, j)
+
+            w1 /= np.sqrt((w1**2).sum())
+
+            lim = np.abs(np.abs((w1 * w).sum()) - 1)
+            w = w1
+            if lim < tol:
+                break
+
+        n_iter.append(i + 1)
+        W[j, :] = w
+
+    return W, max(n_iter)
+
+
+def _ica_par(X, tol, g, fun_args, max_iter, w_init):
+    """Parallel FastICA.
+
+    Used internally by FastICA --main loop
+
+    """
+    W = _sym_decorrelation(w_init)
+    del w_init
+    p_ = float(X.shape[1])
+    for ii in range(max_iter):
+        gwtx, g_wtx = g(np.dot(W, X), fun_args)
+        W1 = _sym_decorrelation(np.dot(gwtx, X.T) / p_ - g_wtx[:, np.newaxis] * W)
+        del gwtx, g_wtx
+        # builtin max, abs are faster than numpy counter parts.
+        # np.einsum allows having the lowest memory footprint.
+        # It is faster than np.diag(np.dot(W1, W.T)).
+        lim = max(abs(abs(np.einsum("ij,ij->i", W1, W)) - 1))
+        W = W1
+        if lim < tol:
+            break
+    else:
+        warnings.warn(
+            (
+                "FastICA did not converge. Consider increasing "
+                "tolerance or the maximum number of iterations."
+            ),
+            ConvergenceWarning,
+        )
+
+    return W, ii + 1
+
+
+# Some standard non-linear functions.
+# XXX: these should be optimized, as they can be a bottleneck.
+def _logcosh(x, fun_args=None):
+    alpha = fun_args.get("alpha", 1.0)  # comment it out?
+
+    x *= alpha
+    gx = np.tanh(x, x)  # apply the tanh inplace
+    g_x = np.empty(x.shape[0], dtype=x.dtype)
+    # XXX compute in chunks to avoid extra allocation
+    for i, gx_i in enumerate(gx):  # please don't vectorize.
+        g_x[i] = (alpha * (1 - gx_i**2)).mean()
+    return gx, g_x
+
+
+def _exp(x, fun_args):
+    exp = np.exp(-(x**2) / 2)
+    gx = x * exp
+    g_x = (1 - x**2) * exp
+    return gx, g_x.mean(axis=-1)
+
+
+def _cube(x, fun_args):
+    return x**3, (3 * x**2).mean(axis=-1)
+
+
+@validate_params(
+    {
+        "X": ["array-like"],
+        "return_X_mean": ["boolean"],
+        "compute_sources": ["boolean"],
+        "return_n_iter": ["boolean"],
+    },
+    prefer_skip_nested_validation=False,
+)
+def fastica(
+    X,
+    n_components=None,
+    *,
+    algorithm="parallel",
+    whiten="unit-variance",
+    fun="logcosh",
+    fun_args=None,
+    max_iter=200,
+    tol=1e-04,
+    w_init=None,
+    whiten_solver="svd",
+    random_state=None,
+    return_X_mean=False,
+    compute_sources=True,
+    return_n_iter=False,
+):
+    """Perform Fast Independent Component Analysis.
+
+    The implementation is based on [1]_.
+
+    Read more in the :ref:`User Guide <ICA>`.
+
+    Parameters
+    ----------
+    X : array-like of shape (n_samples, n_features)
+        Training vector, where `n_samples` is the number of samples and
+        `n_features` is the number of features.
+
+    n_components : int, default=None
+        Number of components to use. If None is passed, all are used.
+
+    algorithm : {'parallel', 'deflation'}, default='parallel'
+        Specify which algorithm to use for FastICA.
+
+    whiten : str or bool, default='unit-variance'
+        Specify the whitening strategy to use.
+
+        - If 'arbitrary-variance', a whitening with variance
+          arbitrary is used.
+        - If 'unit-variance', the whitening matrix is rescaled to ensure that
+          each recovered source has unit variance.
+        - If False, the data is already considered to be whitened, and no
+          whitening is performed.
+
+        .. versionchanged:: 1.3
+            The default value of `whiten` changed to 'unit-variance' in 1.3.
+
+    fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'
+        The functional form of the G function used in the
+        approximation to neg-entropy. Could be either 'logcosh', 'exp',
+        or 'cube'.
+        You can also provide your own function. It should return a tuple
+        containing the value of the function, and of its derivative, in the
+        point. The derivative should be averaged along its last dimension.
+        Example::
+
+            def my_g(x):
+                return x ** 3, (3 * x ** 2).mean(axis=-1)
+
+    fun_args : dict, default=None
+        Arguments to send to the functional form.
+        If empty or None and if fun='logcosh', fun_args will take value
+        {'alpha' : 1.0}.
+
+    max_iter : int, default=200
+        Maximum number of iterations to perform.
+
+    tol : float, default=1e-4
+        A positive scalar giving the tolerance at which the
+        un-mixing matrix is considered to have converged.
+
+    w_init : ndarray of shape (n_components, n_components), default=None
+        Initial un-mixing array. If `w_init=None`, then an array of values
+        drawn from a normal distribution is used.
+
+    whiten_solver : {"eigh", "svd"}, default="svd"
+        The solver to use for whitening.
+
+        - "svd" is more stable numerically if the problem is degenerate, and
+          often faster when `n_samples <= n_features`.
+
+        - "eigh" is generally more memory efficient when
+          `n_samples >= n_features`, and can be faster when
+          `n_samples >= 50 * n_features`.
+
+        .. versionadded:: 1.2
+
+    random_state : int, RandomState instance or None, default=None
+        Used to initialize ``w_init`` when not specified, with a
+        normal distribution. Pass an int, for reproducible results
+        across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    return_X_mean : bool, default=False
+        If True, X_mean is returned too.
+
+    compute_sources : bool, default=True
+        If False, sources are not computed, but only the rotation matrix.
+        This can save memory when working with big data. Defaults to True.
+
+    return_n_iter : bool, default=False
+        Whether or not to return the number of iterations.
+
+    Returns
+    -------
+    K : ndarray of shape (n_components, n_features) or None
+        If whiten is 'True', K is the pre-whitening matrix that projects data
+        onto the first n_components principal components. If whiten is 'False',
+        K is 'None'.
+
+    W : ndarray of shape (n_components, n_components)
+        The square matrix that unmixes the data after whitening.
+        The mixing matrix is the pseudo-inverse of matrix ``W K``
+        if K is not None, else it is the inverse of W.
+
+    S : ndarray of shape (n_samples, n_components) or None
+        Estimated source matrix.
+
+    X_mean : ndarray of shape (n_features,)
+        The mean over features. Returned only if return_X_mean is True.
+
+    n_iter : int
+        If the algorithm is "deflation", n_iter is the
+        maximum number of iterations run across all components. Else
+        they are just the number of iterations taken to converge. This is
+        returned only when return_n_iter is set to `True`.
+
+    Notes
+    -----
+    The data matrix X is considered to be a linear combination of
+    non-Gaussian (independent) components i.e. X = AS where columns of S
+    contain the independent components and A is a linear mixing
+    matrix. In short ICA attempts to `un-mix' the data by estimating an
+    un-mixing matrix W where ``S = W K X.``
+    While FastICA was proposed to estimate as many sources
+    as features, it is possible to estimate less by setting
+    n_components < n_features. It this case K is not a square matrix
+    and the estimated A is the pseudo-inverse of ``W K``.
+
+    This implementation was originally made for data of shape
+    [n_features, n_samples]. Now the input is transposed
+    before the algorithm is applied. This makes it slightly
+    faster for Fortran-ordered input.
+
+    References
+    ----------
+    .. [1] A. Hyvarinen and E. Oja, "Fast Independent Component Analysis",
+           Algorithms and Applications, Neural Networks, 13(4-5), 2000,
+           pp. 411-430.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.decomposition import fastica
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> K, W, S = fastica(X, n_components=7, random_state=0, whiten='unit-variance')
+    >>> K.shape
+    (7, 64)
+    >>> W.shape
+    (7, 7)
+    >>> S.shape
+    (1797, 7)
+    """
+    est = FastICA(
+        n_components=n_components,
+        algorithm=algorithm,
+        whiten=whiten,
+        fun=fun,
+        fun_args=fun_args,
+        max_iter=max_iter,
+        tol=tol,
+        w_init=w_init,
+        whiten_solver=whiten_solver,
+        random_state=random_state,
+    )
+    est._validate_params()
+    S = est._fit_transform(X, compute_sources=compute_sources)
+
+    if est.whiten in ["unit-variance", "arbitrary-variance"]:
+        K = est.whitening_
+        X_mean = est.mean_
+    else:
+        K = None
+        X_mean = None
+
+    returned_values = [K, est._unmixing, S]
+    if return_X_mean:
+        returned_values.append(X_mean)
+    if return_n_iter:
+        returned_values.append(est.n_iter_)
+
+    return returned_values
+
+
+class FastICA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
+    """FastICA: a fast algorithm for Independent Component Analysis.
+
+    The implementation is based on [1]_.
+
+    Read more in the :ref:`User Guide <ICA>`.
+
+    Parameters
+    ----------
+    n_components : int, default=None
+        Number of components to use. If None is passed, all are used.
+
+    algorithm : {'parallel', 'deflation'}, default='parallel'
+        Specify which algorithm to use for FastICA.
+
+    whiten : str or bool, default='unit-variance'
+        Specify the whitening strategy to use.
+
+        - If 'arbitrary-variance', a whitening with variance
+          arbitrary is used.
+        - If 'unit-variance', the whitening matrix is rescaled to ensure that
+          each recovered source has unit variance.
+        - If False, the data is already considered to be whitened, and no
+          whitening is performed.
+
+        .. versionchanged:: 1.3
+            The default value of `whiten` changed to 'unit-variance' in 1.3.
+
+    fun : {'logcosh', 'exp', 'cube'} or callable, default='logcosh'
+        The functional form of the G function used in the
+        approximation to neg-entropy. Could be either 'logcosh', 'exp',
+        or 'cube'.
+        You can also provide your own function. It should return a tuple
+        containing the value of the function, and of its derivative, in the
+        point. The derivative should be averaged along its last dimension.
+        Example::
+
+            def my_g(x):
+                return x ** 3, (3 * x ** 2).mean(axis=-1)
+
+    fun_args : dict, default=None
+        Arguments to send to the functional form.
+        If empty or None and if fun='logcosh', fun_args will take value
+        {'alpha' : 1.0}.
+
+    max_iter : int, default=200
+        Maximum number of iterations during fit.
+
+    tol : float, default=1e-4
+        A positive scalar giving the tolerance at which the
+        un-mixing matrix is considered to have converged.
+
+    w_init : array-like of shape (n_components, n_components), default=None
+        Initial un-mixing array. If `w_init=None`, then an array of values
+        drawn from a normal distribution is used.
+
+    whiten_solver : {"eigh", "svd"}, default="svd"
+        The solver to use for whitening.
+
+        - "svd" is more stable numerically if the problem is degenerate, and
+          often faster when `n_samples <= n_features`.
+
+        - "eigh" is generally more memory efficient when
+          `n_samples >= n_features`, and can be faster when
+          `n_samples >= 50 * n_features`.
+
+        .. versionadded:: 1.2
+
+    random_state : int, RandomState instance or None, default=None
+        Used to initialize ``w_init`` when not specified, with a
+        normal distribution. Pass an int, for reproducible results
+        across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        The linear operator to apply to the data to get the independent
+        sources. This is equal to the unmixing matrix when ``whiten`` is
+        False, and equal to ``np.dot(unmixing_matrix, self.whitening_)`` when
+        ``whiten`` is True.
+
+    mixing_ : ndarray of shape (n_features, n_components)
+        The pseudo-inverse of ``components_``. It is the linear operator
+        that maps independent sources to the data.
+
+    mean_ : ndarray of shape(n_features,)
+        The mean over features. Only set if `self.whiten` is True.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        If the algorithm is "deflation", n_iter is the
+        maximum number of iterations run across all components. Else
+        they are just the number of iterations taken to converge.
+
+    whitening_ : ndarray of shape (n_components, n_features)
+        Only set if whiten is 'True'. This is the pre-whitening matrix
+        that projects data onto the first `n_components` principal components.
+
+    See Also
+    --------
+    PCA : Principal component analysis (PCA).
+    IncrementalPCA : Incremental principal components analysis (IPCA).
+    KernelPCA : Kernel Principal component analysis (KPCA).
+    MiniBatchSparsePCA : Mini-batch Sparse Principal Components Analysis.
+    SparsePCA : Sparse Principal Components Analysis (SparsePCA).
+
+    References
+    ----------
+    .. [1] A. Hyvarinen and E. Oja, Independent Component Analysis:
+           Algorithms and Applications, Neural Networks, 13(4-5), 2000,
+           pp. 411-430.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.decomposition import FastICA
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> transformer = FastICA(n_components=7,
+    ...         random_state=0,
+    ...         whiten='unit-variance')
+    >>> X_transformed = transformer.fit_transform(X)
+    >>> X_transformed.shape
+    (1797, 7)
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [Interval(Integral, 1, None, closed="left"), None],
+        "algorithm": [StrOptions({"parallel", "deflation"})],
+        "whiten": [
+            StrOptions({"arbitrary-variance", "unit-variance"}),
+            Options(bool, {False}),
+        ],
+        "fun": [StrOptions({"logcosh", "exp", "cube"}), callable],
+        "fun_args": [dict, None],
+        "max_iter": [Interval(Integral, 1, None, closed="left")],
+        "tol": [Interval(Real, 0.0, None, closed="left")],
+        "w_init": ["array-like", None],
+        "whiten_solver": [StrOptions({"eigh", "svd"})],
+        "random_state": ["random_state"],
+    }
+
+    def __init__(
+        self,
+        n_components=None,
+        *,
+        algorithm="parallel",
+        whiten="unit-variance",
+        fun="logcosh",
+        fun_args=None,
+        max_iter=200,
+        tol=1e-4,
+        w_init=None,
+        whiten_solver="svd",
+        random_state=None,
+    ):
+        super().__init__()
+        self.n_components = n_components
+        self.algorithm = algorithm
+        self.whiten = whiten
+        self.fun = fun
+        self.fun_args = fun_args
+        self.max_iter = max_iter
+        self.tol = tol
+        self.w_init = w_init
+        self.whiten_solver = whiten_solver
+        self.random_state = random_state
+
+    def _fit_transform(self, X, compute_sources=False):
+        """Fit the model.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        compute_sources : bool, default=False
+            If False, sources are not computes but only the rotation matrix.
+            This can save memory when working with big data. Defaults to False.
+
+        Returns
+        -------
+        S : ndarray of shape (n_samples, n_components) or None
+            Sources matrix. `None` if `compute_sources` is `False`.
+        """
+        XT = validate_data(
+            self,
+            X,
+            copy=self.whiten,
+            dtype=[np.float64, np.float32],
+            ensure_min_samples=2,
+        ).T
+        fun_args = {} if self.fun_args is None else self.fun_args
+        random_state = check_random_state(self.random_state)
+
+        alpha = fun_args.get("alpha", 1.0)
+        if not 1 <= alpha <= 2:
+            raise ValueError("alpha must be in [1,2]")
+
+        if self.fun == "logcosh":
+            g = _logcosh
+        elif self.fun == "exp":
+            g = _exp
+        elif self.fun == "cube":
+            g = _cube
+        elif callable(self.fun):
+
+            def g(x, fun_args):
+                return self.fun(x, **fun_args)
+
+        n_features, n_samples = XT.shape
+        n_components = self.n_components
+        if not self.whiten and n_components is not None:
+            n_components = None
+            warnings.warn("Ignoring n_components with whiten=False.")
+
+        if n_components is None:
+            n_components = min(n_samples, n_features)
+        if n_components > min(n_samples, n_features):
+            n_components = min(n_samples, n_features)
+            warnings.warn(
+                "n_components is too large: it will be set to %s" % n_components
+            )
+
+        if self.whiten:
+            # Centering the features of X
+            X_mean = XT.mean(axis=-1)
+            XT -= X_mean[:, np.newaxis]
+
+            # Whitening and preprocessing by PCA
+            if self.whiten_solver == "eigh":
+                # Faster when num_samples >> n_features
+                d, u = linalg.eigh(XT.dot(X))
+                sort_indices = np.argsort(d)[::-1]
+                eps = np.finfo(d.dtype).eps * 10
+                degenerate_idx = d < eps
+                if np.any(degenerate_idx):
+                    warnings.warn(
+                        "There are some small singular values, using "
+                        "whiten_solver = 'svd' might lead to more "
+                        "accurate results."
+                    )
+                d[degenerate_idx] = eps  # For numerical issues
+                np.sqrt(d, out=d)
+                d, u = d[sort_indices], u[:, sort_indices]
+            elif self.whiten_solver == "svd":
+                u, d = linalg.svd(XT, full_matrices=False, check_finite=False)[:2]
+
+            # Give consistent eigenvectors for both svd solvers
+            u *= np.sign(u[0])
+
+            K = (u / d).T[:n_components]  # see (6.33) p.140
+            del u, d
+            X1 = np.dot(K, XT)
+            # see (13.6) p.267 Here X1 is white and data
+            # in X has been projected onto a subspace by PCA
+            X1 *= np.sqrt(n_samples)
+        else:
+            # X must be casted to floats to avoid typing issues with numpy
+            # 2.0 and the line below
+            X1 = as_float_array(XT, copy=False)  # copy has been taken care of
+
+        w_init = self.w_init
+        if w_init is None:
+            w_init = np.asarray(
+                random_state.normal(size=(n_components, n_components)), dtype=X1.dtype
+            )
+
+        else:
+            w_init = np.asarray(w_init)
+            if w_init.shape != (n_components, n_components):
+                raise ValueError(
+                    "w_init has invalid shape -- should be %(shape)s"
+                    % {"shape": (n_components, n_components)}
+                )
+
+        kwargs = {
+            "tol": self.tol,
+            "g": g,
+            "fun_args": fun_args,
+            "max_iter": self.max_iter,
+            "w_init": w_init,
+        }
+
+        if self.algorithm == "parallel":
+            W, n_iter = _ica_par(X1, **kwargs)
+        elif self.algorithm == "deflation":
+            W, n_iter = _ica_def(X1, **kwargs)
+        del X1
+
+        self.n_iter_ = n_iter
+
+        if compute_sources:
+            if self.whiten:
+                S = np.linalg.multi_dot([W, K, XT]).T
+            else:
+                S = np.dot(W, XT).T
+        else:
+            S = None
+
+        if self.whiten:
+            if self.whiten == "unit-variance":
+                if not compute_sources:
+                    S = np.linalg.multi_dot([W, K, XT]).T
+                S_std = np.std(S, axis=0, keepdims=True)
+                S /= S_std
+                W /= S_std.T
+
+            self.components_ = np.dot(W, K)
+            self.mean_ = X_mean
+            self.whitening_ = K
+        else:
+            self.components_ = W
+
+        self.mixing_ = linalg.pinv(self.components_, check_finite=False)
+        self._unmixing = W
+
+        return S
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit_transform(self, X, y=None):
+        """Fit the model and recover the sources from X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Estimated sources obtained by transforming the data with the
+            estimated unmixing matrix.
+        """
+        return self._fit_transform(X, compute_sources=True)
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the model to X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        self._fit_transform(X, compute_sources=False)
+        return self
+
+    def transform(self, X, copy=True):
+        """Recover the sources from X (apply the unmixing matrix).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Data to transform, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        copy : bool, default=True
+            If False, data passed to fit can be overwritten. Defaults to True.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Estimated sources obtained by transforming the data with the
+            estimated unmixing matrix.
+        """
+        check_is_fitted(self)
+
+        X = validate_data(
+            self,
+            X,
+            copy=(copy and self.whiten),
+            dtype=[np.float64, np.float32],
+            reset=False,
+        )
+        if self.whiten:
+            X -= self.mean_
+
+        return np.dot(X, self.components_.T)
+
+    def inverse_transform(self, X, copy=True):
+        """Transform the sources back to the mixed data (apply mixing matrix).
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_components)
+            Sources, where `n_samples` is the number of samples
+            and `n_components` is the number of components.
+        copy : bool, default=True
+            If False, data passed to fit are overwritten. Defaults to True.
+
+        Returns
+        -------
+        X_original : ndarray of shape (n_samples, n_features)
+            Reconstructed data obtained with the mixing matrix.
+        """
+        check_is_fitted(self)
+
+        X = check_array(X, copy=(copy and self.whiten), dtype=[np.float64, np.float32])
+        X = np.dot(X, self.mixing_.T)
+        if self.whiten:
+            X += self.mean_
+
+        return X
+
+    @property
+    def _n_features_out(self):
+        """Number of transformed output features."""
+        return self.components_.shape[0]
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        return tags
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_incremental_pca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_incremental_pca.py
@@ -0,0 +1,426 @@
+"""Incremental Principal Components Analysis."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from numbers import Integral
+
+import numpy as np
+from scipy import linalg, sparse
+
+from sklearn.utils import metadata_routing
+
+from ..base import _fit_context
+from ..utils import gen_batches
+from ..utils._param_validation import Interval
+from ..utils.extmath import _incremental_mean_and_var, svd_flip
+from ..utils.validation import validate_data
+from ._base import _BasePCA
+
+
+class IncrementalPCA(_BasePCA):
+    """Incremental principal components analysis (IPCA).
+
+    Linear dimensionality reduction using Singular Value Decomposition of
+    the data, keeping only the most significant singular vectors to
+    project the data to a lower dimensional space. The input data is centered
+    but not scaled for each feature before applying the SVD.
+
+    Depending on the size of the input data, this algorithm can be much more
+    memory efficient than a PCA, and allows sparse input.
+
+    This algorithm has constant memory complexity, on the order
+    of ``batch_size * n_features``, enabling use of np.memmap files without
+    loading the entire file into memory. For sparse matrices, the input
+    is converted to dense in batches (in order to be able to subtract the
+    mean) which avoids storing the entire dense matrix at any one time.
+
+    The computational overhead of each SVD is
+    ``O(batch_size * n_features ** 2)``, but only 2 * batch_size samples
+    remain in memory at a time. There will be ``n_samples / batch_size`` SVD
+    computations to get the principal components, versus 1 large SVD of
+    complexity ``O(n_samples * n_features ** 2)`` for PCA.
+
+    For a usage example, see
+    :ref:`sphx_glr_auto_examples_decomposition_plot_incremental_pca.py`.
+
+    Read more in the :ref:`User Guide <IncrementalPCA>`.
+
+    .. versionadded:: 0.16
+
+    Parameters
+    ----------
+    n_components : int, default=None
+        Number of components to keep. If ``n_components`` is ``None``,
+        then ``n_components`` is set to ``min(n_samples, n_features)``.
+
+    whiten : bool, default=False
+        When True (False by default) the ``components_`` vectors are divided
+        by ``n_samples`` times ``components_`` to ensure uncorrelated outputs
+        with unit component-wise variances.
+
+        Whitening will remove some information from the transformed signal
+        (the relative variance scales of the components) but can sometimes
+        improve the predictive accuracy of the downstream estimators by
+        making data respect some hard-wired assumptions.
+
+    copy : bool, default=True
+        If False, X will be overwritten. ``copy=False`` can be used to
+        save memory but is unsafe for general use.
+
+    batch_size : int, default=None
+        The number of samples to use for each batch. Only used when calling
+        ``fit``. If ``batch_size`` is ``None``, then ``batch_size``
+        is inferred from the data and set to ``5 * n_features``, to provide a
+        balance between approximation accuracy and memory consumption.
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        Principal axes in feature space, representing the directions of
+        maximum variance in the data. Equivalently, the right singular
+        vectors of the centered input data, parallel to its eigenvectors.
+        The components are sorted by decreasing ``explained_variance_``.
+
+    explained_variance_ : ndarray of shape (n_components,)
+        Variance explained by each of the selected components.
+
+    explained_variance_ratio_ : ndarray of shape (n_components,)
+        Percentage of variance explained by each of the selected components.
+        If all components are stored, the sum of explained variances is equal
+        to 1.0.
+
+    singular_values_ : ndarray of shape (n_components,)
+        The singular values corresponding to each of the selected components.
+        The singular values are equal to the 2-norms of the ``n_components``
+        variables in the lower-dimensional space.
+
+    mean_ : ndarray of shape (n_features,)
+        Per-feature empirical mean, aggregate over calls to ``partial_fit``.
+
+    var_ : ndarray of shape (n_features,)
+        Per-feature empirical variance, aggregate over calls to
+        ``partial_fit``.
+
+    noise_variance_ : float
+        The estimated noise covariance following the Probabilistic PCA model
+        from Tipping and Bishop 1999. See "Pattern Recognition and
+        Machine Learning" by C. Bishop, 12.2.1 p. 574 or
+        http://www.miketipping.com/papers/met-mppca.pdf.
+
+    n_components_ : int
+        The estimated number of components. Relevant when
+        ``n_components=None``.
+
+    n_samples_seen_ : int
+        The number of samples processed by the estimator. Will be reset on
+        new calls to fit, but increments across ``partial_fit`` calls.
+
+    batch_size_ : int
+        Inferred batch size from ``batch_size``.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    PCA : Principal component analysis (PCA).
+    KernelPCA : Kernel Principal component analysis (KPCA).
+    SparsePCA : Sparse Principal Components Analysis (SparsePCA).
+    TruncatedSVD : Dimensionality reduction using truncated SVD.
+
+    Notes
+    -----
+    Implements the incremental PCA model from:
+    *D. Ross, J. Lim, R. Lin, M. Yang, Incremental Learning for Robust Visual
+    Tracking, International Journal of Computer Vision, Volume 77, Issue 1-3,
+    pp. 125-141, May 2008.*
+    See https://www.cs.toronto.edu/~dross/ivt/RossLimLinYang_ijcv.pdf
+
+    This model is an extension of the Sequential Karhunen-Loeve Transform from:
+    :doi:`A. Levy and M. Lindenbaum, Sequential Karhunen-Loeve Basis Extraction and
+    its Application to Images, IEEE Transactions on Image Processing, Volume 9,
+    Number 8, pp. 1371-1374, August 2000. <10.1109/83.855432>`
+
+    We have specifically abstained from an optimization used by authors of both
+    papers, a QR decomposition used in specific situations to reduce the
+    algorithmic complexity of the SVD. The source for this technique is
+    *Matrix Computations, Third Edition, G. Holub and C. Van Loan, Chapter 5,
+    section 5.4.4, pp 252-253.*. This technique has been omitted because it is
+    advantageous only when decomposing a matrix with ``n_samples`` (rows)
+    >= 5/3 * ``n_features`` (columns), and hurts the readability of the
+    implemented algorithm. This would be a good opportunity for future
+    optimization, if it is deemed necessary.
+
+    References
+    ----------
+    D. Ross, J. Lim, R. Lin, M. Yang. Incremental Learning for Robust Visual
+    Tracking, International Journal of Computer Vision, Volume 77,
+    Issue 1-3, pp. 125-141, May 2008.
+
+    G. Golub and C. Van Loan. Matrix Computations, Third Edition, Chapter 5,
+    Section 5.4.4, pp. 252-253.
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.decomposition import IncrementalPCA
+    >>> from scipy import sparse
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> transformer = IncrementalPCA(n_components=7, batch_size=200)
+    >>> # either partially fit on smaller batches of data
+    >>> transformer.partial_fit(X[:100, :])
+    IncrementalPCA(batch_size=200, n_components=7)
+    >>> # or let the fit function itself divide the data into batches
+    >>> X_sparse = sparse.csr_matrix(X)
+    >>> X_transformed = transformer.fit_transform(X_sparse)
+    >>> X_transformed.shape
+    (1797, 7)
+    """
+
+    __metadata_request__partial_fit = {"check_input": metadata_routing.UNUSED}
+
+    _parameter_constraints: dict = {
+        "n_components": [Interval(Integral, 1, None, closed="left"), None],
+        "whiten": ["boolean"],
+        "copy": ["boolean"],
+        "batch_size": [Interval(Integral, 1, None, closed="left"), None],
+    }
+
+    def __init__(self, n_components=None, *, whiten=False, copy=True, batch_size=None):
+        self.n_components = n_components
+        self.whiten = whiten
+        self.copy = copy
+        self.batch_size = batch_size
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the model with X, using minibatches of size batch_size.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        self.components_ = None
+        self.n_samples_seen_ = 0
+        self.mean_ = 0.0
+        self.var_ = 0.0
+        self.singular_values_ = None
+        self.explained_variance_ = None
+        self.explained_variance_ratio_ = None
+        self.noise_variance_ = None
+
+        X = validate_data(
+            self,
+            X,
+            accept_sparse=["csr", "csc", "lil"],
+            copy=self.copy,
+            dtype=[np.float64, np.float32],
+            force_writeable=True,
+        )
+        n_samples, n_features = X.shape
+
+        if self.batch_size is None:
+            self.batch_size_ = 5 * n_features
+        else:
+            self.batch_size_ = self.batch_size
+
+        for batch in gen_batches(
+            n_samples, self.batch_size_, min_batch_size=self.n_components or 0
+        ):
+            X_batch = X[batch]
+            if sparse.issparse(X_batch):
+                X_batch = X_batch.toarray()
+            self.partial_fit(X_batch, check_input=False)
+
+        return self
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def partial_fit(self, X, y=None, check_input=True):
+        """Incremental fit with X. All of X is processed as a single batch.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        check_input : bool, default=True
+            Run check_array on X.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        first_pass = not hasattr(self, "components_")
+
+        if check_input:
+            if sparse.issparse(X):
+                raise TypeError(
+                    "IncrementalPCA.partial_fit does not support "
+                    "sparse input. Either convert data to dense "
+                    "or use IncrementalPCA.fit to do so in batches."
+                )
+            X = validate_data(
+                self,
+                X,
+                copy=self.copy,
+                dtype=[np.float64, np.float32],
+                force_writeable=True,
+                reset=first_pass,
+            )
+        n_samples, n_features = X.shape
+        if first_pass:
+            self.components_ = None
+
+        if self.n_components is None:
+            if self.components_ is None:
+                self.n_components_ = min(n_samples, n_features)
+            else:
+                self.n_components_ = self.components_.shape[0]
+        elif not self.n_components <= n_features:
+            raise ValueError(
+                "n_components=%r invalid for n_features=%d, need "
+                "more rows than columns for IncrementalPCA "
+                "processing" % (self.n_components, n_features)
+            )
+        elif self.n_components > n_samples and first_pass:
+            raise ValueError(
+                f"n_components={self.n_components} must be less or equal to "
+                f"the batch number of samples {n_samples} for the first "
+                "partial_fit call."
+            )
+        else:
+            self.n_components_ = self.n_components
+
+        if (self.components_ is not None) and (
+            self.components_.shape[0] != self.n_components_
+        ):
+            raise ValueError(
+                "Number of input features has changed from %i "
+                "to %i between calls to partial_fit! Try "
+                "setting n_components to a fixed value."
+                % (self.components_.shape[0], self.n_components_)
+            )
+
+        # This is the first partial_fit
+        if not hasattr(self, "n_samples_seen_"):
+            self.n_samples_seen_ = 0
+            self.mean_ = 0.0
+            self.var_ = 0.0
+
+        # Update stats - they are 0 if this is the first step
+        col_mean, col_var, n_total_samples = _incremental_mean_and_var(
+            X,
+            last_mean=self.mean_,
+            last_variance=self.var_,
+            last_sample_count=np.repeat(self.n_samples_seen_, X.shape[1]),
+        )
+        n_total_samples = n_total_samples[0]
+
+        # Whitening
+        if self.n_samples_seen_ == 0:
+            # If it is the first step, simply whiten X
+            X -= col_mean
+        else:
+            col_batch_mean = np.mean(X, axis=0)
+            X -= col_batch_mean
+            # Build matrix of combined previous basis and new data
+            mean_correction = np.sqrt(
+                (self.n_samples_seen_ / n_total_samples) * n_samples
+            ) * (self.mean_ - col_batch_mean)
+            X = np.vstack(
+                (
+                    self.singular_values_.reshape((-1, 1)) * self.components_,
+                    X,
+                    mean_correction,
+                )
+            )
+
+        U, S, Vt = linalg.svd(X, full_matrices=False, check_finite=False)
+        U, Vt = svd_flip(U, Vt, u_based_decision=False)
+        explained_variance = S**2 / (n_total_samples - 1)
+        explained_variance_ratio = S**2 / np.sum(col_var * n_total_samples)
+
+        self.n_samples_seen_ = n_total_samples
+        self.components_ = Vt[: self.n_components_]
+        self.singular_values_ = S[: self.n_components_]
+        self.mean_ = col_mean
+        self.var_ = col_var
+        self.explained_variance_ = explained_variance[: self.n_components_]
+        self.explained_variance_ratio_ = explained_variance_ratio[: self.n_components_]
+        # we already checked `self.n_components <= n_samples` above
+        if self.n_components_ not in (n_samples, n_features):
+            self.noise_variance_ = explained_variance[self.n_components_ :].mean()
+        else:
+            self.noise_variance_ = 0.0
+        return self
+
+    def transform(self, X):
+        """Apply dimensionality reduction to X.
+
+        X is projected on the first principal components previously extracted
+        from a training set, using minibatches of size batch_size if X is
+        sparse.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            New data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Projection of X in the first principal components.
+
+        Examples
+        --------
+
+        >>> import numpy as np
+        >>> from sklearn.decomposition import IncrementalPCA
+        >>> X = np.array([[-1, -1], [-2, -1], [-3, -2],
+        ...               [1, 1], [2, 1], [3, 2]])
+        >>> ipca = IncrementalPCA(n_components=2, batch_size=3)
+        >>> ipca.fit(X)
+        IncrementalPCA(batch_size=3, n_components=2)
+        >>> ipca.transform(X) # doctest: +SKIP
+        """
+        if sparse.issparse(X):
+            n_samples = X.shape[0]
+            output = []
+            for batch in gen_batches(
+                n_samples, self.batch_size_, min_batch_size=self.n_components or 0
+            ):
+                output.append(super().transform(X[batch].toarray()))
+            return np.vstack(output)
+        else:
+            return super().transform(X)
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        # Beware that fit accepts sparse data but partial_fit doesn't
+        tags.input_tags.sparse = True
+        return tags
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_kernel_pca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_kernel_pca.py
@@ -0,0 +1,579 @@
+"""Kernel Principal Components Analysis."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from numbers import Integral, Real
+
+import numpy as np
+from scipy import linalg
+from scipy.linalg import eigh
+from scipy.sparse.linalg import eigsh
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..exceptions import NotFittedError
+from ..metrics.pairwise import pairwise_kernels
+from ..preprocessing import KernelCenterer
+from ..utils._arpack import _init_arpack_v0
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import _randomized_eigsh, svd_flip
+from ..utils.validation import (
+    _check_psd_eigenvalues,
+    check_is_fitted,
+    validate_data,
+)
+
+
+class KernelPCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
+    """Kernel Principal component analysis (KPCA).
+
+    Non-linear dimensionality reduction through the use of kernels [1]_, see also
+    :ref:`metrics`.
+
+    It uses the :func:`scipy.linalg.eigh` LAPACK implementation of the full SVD
+    or the :func:`scipy.sparse.linalg.eigsh` ARPACK implementation of the
+    truncated SVD, depending on the shape of the input data and the number of
+    components to extract. It can also use a randomized truncated SVD by the
+    method proposed in [3]_, see `eigen_solver`.
+
+    For a usage example and comparison between
+    Principal Components Analysis (PCA) and its kernelized version (KPCA), see
+    :ref:`sphx_glr_auto_examples_decomposition_plot_kernel_pca.py`.
+
+    For a usage example in denoising images using KPCA, see
+    :ref:`sphx_glr_auto_examples_applications_plot_digits_denoising.py`.
+
+    Read more in the :ref:`User Guide <kernel_PCA>`.
+
+    Parameters
+    ----------
+    n_components : int, default=None
+        Number of components. If None, all non-zero components are kept.
+
+    kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'cosine', 'precomputed'} \
+            or callable, default='linear'
+        Kernel used for PCA.
+
+    gamma : float, default=None
+        Kernel coefficient for rbf, poly and sigmoid kernels. Ignored by other
+        kernels. If ``gamma`` is ``None``, then it is set to ``1/n_features``.
+
+    degree : float, default=3
+        Degree for poly kernels. Ignored by other kernels.
+
+    coef0 : float, default=1
+        Independent term in poly and sigmoid kernels.
+        Ignored by other kernels.
+
+    kernel_params : dict, default=None
+        Parameters (keyword arguments) and
+        values for kernel passed as callable object.
+        Ignored by other kernels.
+
+    alpha : float, default=1.0
+        Hyperparameter of the ridge regression that learns the
+        inverse transform (when fit_inverse_transform=True).
+
+    fit_inverse_transform : bool, default=False
+        Learn the inverse transform for non-precomputed kernels
+        (i.e. learn to find the pre-image of a point). This method is based
+        on [2]_.
+
+    eigen_solver : {'auto', 'dense', 'arpack', 'randomized'}, \
+            default='auto'
+        Select eigensolver to use. If `n_components` is much
+        less than the number of training samples, randomized (or arpack to a
+        smaller extent) may be more efficient than the dense eigensolver.
+        Randomized SVD is performed according to the method of Halko et al
+        [3]_.
+
+        auto :
+            the solver is selected by a default policy based on n_samples
+            (the number of training samples) and `n_components`:
+            if the number of components to extract is less than 10 (strict) and
+            the number of samples is more than 200 (strict), the 'arpack'
+            method is enabled. Otherwise the exact full eigenvalue
+            decomposition is computed and optionally truncated afterwards
+            ('dense' method).
+        dense :
+            run exact full eigenvalue decomposition calling the standard
+            LAPACK solver via `scipy.linalg.eigh`, and select the components
+            by postprocessing
+        arpack :
+            run SVD truncated to n_components calling ARPACK solver using
+            `scipy.sparse.linalg.eigsh`. It requires strictly
+            0 < n_components < n_samples
+        randomized :
+            run randomized SVD by the method of Halko et al. [3]_. The current
+            implementation selects eigenvalues based on their module; therefore
+            using this method can lead to unexpected results if the kernel is
+            not positive semi-definite. See also [4]_.
+
+        .. versionchanged:: 1.0
+           `'randomized'` was added.
+
+    tol : float, default=0
+        Convergence tolerance for arpack.
+        If 0, optimal value will be chosen by arpack.
+
+    max_iter : int, default=None
+        Maximum number of iterations for arpack.
+        If None, optimal value will be chosen by arpack.
+
+    iterated_power : int >= 0, or 'auto', default='auto'
+        Number of iterations for the power method computed by
+        svd_solver == 'randomized'. When 'auto', it is set to 7 when
+        `n_components < 0.1 * min(X.shape)`, other it is set to 4.
+
+        .. versionadded:: 1.0
+
+    remove_zero_eig : bool, default=False
+        If True, then all components with zero eigenvalues are removed, so
+        that the number of components in the output may be < n_components
+        (and sometimes even zero due to numerical instability).
+        When n_components is None, this parameter is ignored and components
+        with zero eigenvalues are removed regardless.
+
+    random_state : int, RandomState instance or None, default=None
+        Used when ``eigen_solver`` == 'arpack' or 'randomized'. Pass an int
+        for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+        .. versionadded:: 0.18
+
+    copy_X : bool, default=True
+        If True, input X is copied and stored by the model in the `X_fit_`
+        attribute. If no further changes will be done to X, setting
+        `copy_X=False` saves memory by storing a reference.
+
+        .. versionadded:: 0.18
+
+    n_jobs : int, default=None
+        The number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+        .. versionadded:: 0.18
+
+    Attributes
+    ----------
+    eigenvalues_ : ndarray of shape (n_components,)
+        Eigenvalues of the centered kernel matrix in decreasing order.
+        If `n_components` and `remove_zero_eig` are not set,
+        then all values are stored.
+
+    eigenvectors_ : ndarray of shape (n_samples, n_components)
+        Eigenvectors of the centered kernel matrix. If `n_components` and
+        `remove_zero_eig` are not set, then all components are stored.
+
+    dual_coef_ : ndarray of shape (n_samples, n_features)
+        Inverse transform matrix. Only available when
+        ``fit_inverse_transform`` is True.
+
+    X_transformed_fit_ : ndarray of shape (n_samples, n_components)
+        Projection of the fitted data on the kernel principal components.
+        Only available when ``fit_inverse_transform`` is True.
+
+    X_fit_ : ndarray of shape (n_samples, n_features)
+        The data used to fit the model. If `copy_X=False`, then `X_fit_` is
+        a reference. This attribute is used for the calls to transform.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    gamma_ : float
+        Kernel coefficient for rbf, poly and sigmoid kernels. When `gamma`
+        is explicitly provided, this is just the same as `gamma`. When `gamma`
+        is `None`, this is the actual value of kernel coefficient.
+
+        .. versionadded:: 1.3
+
+    See Also
+    --------
+    FastICA : A fast algorithm for Independent Component Analysis.
+    IncrementalPCA : Incremental Principal Component Analysis.
+    NMF : Non-Negative Matrix Factorization.
+    PCA : Principal Component Analysis.
+    SparsePCA : Sparse Principal Component Analysis.
+    TruncatedSVD : Dimensionality reduction using truncated SVD.
+
+    References
+    ----------
+    .. [1] `Schölkopf, Bernhard, Alexander Smola, and Klaus-Robert Müller.
+       "Kernel principal component analysis."
+       International conference on artificial neural networks.
+       Springer, Berlin, Heidelberg, 1997.
+       <https://people.eecs.berkeley.edu/~wainwrig/stat241b/scholkopf_kernel.pdf>`_
+
+    .. [2] `Bakır, Gökhan H., Jason Weston, and Bernhard Schölkopf.
+       "Learning to find pre-images."
+       Advances in neural information processing systems 16 (2004): 449-456.
+       <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_
+
+    .. [3] :arxiv:`Halko, Nathan, Per-Gunnar Martinsson, and Joel A. Tropp.
+       "Finding structure with randomness: Probabilistic algorithms for
+       constructing approximate matrix decompositions."
+       SIAM review 53.2 (2011): 217-288. <0909.4061>`
+
+    .. [4] `Martinsson, Per-Gunnar, Vladimir Rokhlin, and Mark Tygert.
+       "A randomized algorithm for the decomposition of matrices."
+       Applied and Computational Harmonic Analysis 30.1 (2011): 47-68.
+       <https://www.sciencedirect.com/science/article/pii/S1063520310000242>`_
+
+    Examples
+    --------
+    >>> from sklearn.datasets import load_digits
+    >>> from sklearn.decomposition import KernelPCA
+    >>> X, _ = load_digits(return_X_y=True)
+    >>> transformer = KernelPCA(n_components=7, kernel='linear')
+    >>> X_transformed = transformer.fit_transform(X)
+    >>> X_transformed.shape
+    (1797, 7)
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [
+            Interval(Integral, 1, None, closed="left"),
+            None,
+        ],
+        "kernel": [
+            StrOptions({"linear", "poly", "rbf", "sigmoid", "cosine", "precomputed"}),
+            callable,
+        ],
+        "gamma": [
+            Interval(Real, 0, None, closed="left"),
+            None,
+        ],
+        "degree": [Interval(Real, 0, None, closed="left")],
+        "coef0": [Interval(Real, None, None, closed="neither")],
+        "kernel_params": [dict, None],
+        "alpha": [Interval(Real, 0, None, closed="left")],
+        "fit_inverse_transform": ["boolean"],
+        "eigen_solver": [StrOptions({"auto", "dense", "arpack", "randomized"})],
+        "tol": [Interval(Real, 0, None, closed="left")],
+        "max_iter": [
+            Interval(Integral, 1, None, closed="left"),
+            None,
+        ],
+        "iterated_power": [
+            Interval(Integral, 0, None, closed="left"),
+            StrOptions({"auto"}),
+        ],
+        "remove_zero_eig": ["boolean"],
+        "random_state": ["random_state"],
+        "copy_X": ["boolean"],
+        "n_jobs": [None, Integral],
+    }
+
+    def __init__(
+        self,
+        n_components=None,
+        *,
+        kernel="linear",
+        gamma=None,
+        degree=3,
+        coef0=1,
+        kernel_params=None,
+        alpha=1.0,
+        fit_inverse_transform=False,
+        eigen_solver="auto",
+        tol=0,
+        max_iter=None,
+        iterated_power="auto",
+        remove_zero_eig=False,
+        random_state=None,
+        copy_X=True,
+        n_jobs=None,
+    ):
+        self.n_components = n_components
+        self.kernel = kernel
+        self.kernel_params = kernel_params
+        self.gamma = gamma
+        self.degree = degree
+        self.coef0 = coef0
+        self.alpha = alpha
+        self.fit_inverse_transform = fit_inverse_transform
+        self.eigen_solver = eigen_solver
+        self.tol = tol
+        self.max_iter = max_iter
+        self.iterated_power = iterated_power
+        self.remove_zero_eig = remove_zero_eig
+        self.random_state = random_state
+        self.n_jobs = n_jobs
+        self.copy_X = copy_X
+
+    def _get_kernel(self, X, Y=None):
+        if callable(self.kernel):
+            params = self.kernel_params or {}
+        else:
+            params = {"gamma": self.gamma_, "degree": self.degree, "coef0": self.coef0}
+        return pairwise_kernels(
+            X, Y, metric=self.kernel, filter_params=True, n_jobs=self.n_jobs, **params
+        )
+
+    def _fit_transform_in_place(self, K):
+        """Fit's using kernel K"""
+        # center kernel in place
+        K = self._centerer.fit(K).transform(K, copy=False)
+
+        # adjust n_components according to user inputs
+        if self.n_components is None:
+            n_components = K.shape[0]  # use all dimensions
+        else:
+            n_components = min(K.shape[0], self.n_components)
+
+        # compute eigenvectors
+        if self.eigen_solver == "auto":
+            if K.shape[0] > 200 and n_components < 10:
+                eigen_solver = "arpack"
+            else:
+                eigen_solver = "dense"
+        else:
+            eigen_solver = self.eigen_solver
+
+        if eigen_solver == "dense":
+            # Note: subset_by_index specifies the indices of smallest/largest to return
+            self.eigenvalues_, self.eigenvectors_ = eigh(
+                K, subset_by_index=(K.shape[0] - n_components, K.shape[0] - 1)
+            )
+        elif eigen_solver == "arpack":
+            v0 = _init_arpack_v0(K.shape[0], self.random_state)
+            self.eigenvalues_, self.eigenvectors_ = eigsh(
+                K, n_components, which="LA", tol=self.tol, maxiter=self.max_iter, v0=v0
+            )
+        elif eigen_solver == "randomized":
+            self.eigenvalues_, self.eigenvectors_ = _randomized_eigsh(
+                K,
+                n_components=n_components,
+                n_iter=self.iterated_power,
+                random_state=self.random_state,
+                selection="module",
+            )
+
+        # make sure that the eigenvalues are ok and fix numerical issues
+        self.eigenvalues_ = _check_psd_eigenvalues(
+            self.eigenvalues_, enable_warnings=False
+        )
+
+        # flip eigenvectors' sign to enforce deterministic output
+        self.eigenvectors_, _ = svd_flip(u=self.eigenvectors_, v=None)
+
+        # sort eigenvectors in descending order
+        indices = self.eigenvalues_.argsort()[::-1]
+        self.eigenvalues_ = self.eigenvalues_[indices]
+        self.eigenvectors_ = self.eigenvectors_[:, indices]
+
+        # remove eigenvectors with a zero eigenvalue (null space) if required
+        if self.remove_zero_eig or self.n_components is None:
+            self.eigenvectors_ = self.eigenvectors_[:, self.eigenvalues_ > 0]
+            self.eigenvalues_ = self.eigenvalues_[self.eigenvalues_ > 0]
+
+        # Maintenance note on Eigenvectors normalization
+        # ----------------------------------------------
+        # there is a link between
+        # the eigenvectors of K=Phi(X)'Phi(X) and the ones of Phi(X)Phi(X)'
+        # if v is an eigenvector of K
+        #     then Phi(X)v  is an eigenvector of Phi(X)Phi(X)'
+        # if u is an eigenvector of Phi(X)Phi(X)'
+        #     then Phi(X)'u is an eigenvector of Phi(X)'Phi(X)
+        #
+        # At this stage our self.eigenvectors_ (the v) have norm 1, we need to scale
+        # them so that eigenvectors in kernel feature space (the u) have norm=1
+        # instead
+        #
+        # We COULD scale them here:
+        #       self.eigenvectors_ = self.eigenvectors_ / np.sqrt(self.eigenvalues_)
+        #
+        # But choose to perform that LATER when needed, in `fit()` and in
+        # `transform()`.
+
+        return K
+
+    def _fit_inverse_transform(self, X_transformed, X):
+        if hasattr(X, "tocsr"):
+            raise NotImplementedError(
+                "Inverse transform not implemented for sparse matrices!"
+            )
+
+        n_samples = X_transformed.shape[0]
+        K = self._get_kernel(X_transformed)
+        K.flat[:: n_samples + 1] += self.alpha
+        self.dual_coef_ = linalg.solve(K, X, assume_a="pos", overwrite_a=True)
+        self.X_transformed_fit_ = X_transformed
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the model from data in X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        if self.fit_inverse_transform and self.kernel == "precomputed":
+            raise ValueError("Cannot fit_inverse_transform with a precomputed kernel.")
+        X = validate_data(self, X, accept_sparse="csr", copy=self.copy_X)
+        self.gamma_ = 1 / X.shape[1] if self.gamma is None else self.gamma
+        self._centerer = KernelCenterer().set_output(transform="default")
+        K = self._get_kernel(X)
+        # When kernel="precomputed", K is X but it's safe to perform in place operations
+        # on K because a copy was made before if requested by copy_X.
+        self._fit_transform_in_place(K)
+
+        if self.fit_inverse_transform:
+            # no need to use the kernel to transform X, use shortcut expression
+            X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)
+
+            self._fit_inverse_transform(X_transformed, X)
+
+        self.X_fit_ = X
+        return self
+
+    def fit_transform(self, X, y=None, **params):
+        """Fit the model from data in X and transform X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        **params : kwargs
+            Parameters (keyword arguments) and values passed to
+            the fit_transform instance.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Transformed values.
+        """
+        self.fit(X, **params)
+
+        # no need to use the kernel to transform X, use shortcut expression
+        X_transformed = self.eigenvectors_ * np.sqrt(self.eigenvalues_)
+
+        if self.fit_inverse_transform:
+            self._fit_inverse_transform(X_transformed, X)
+
+        return X_transformed
+
+    def transform(self, X):
+        """Transform X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Projection of X in the first principal components, where `n_samples`
+            is the number of samples and `n_components` is the number of the components.
+        """
+        check_is_fitted(self)
+        X = validate_data(self, X, accept_sparse="csr", reset=False)
+
+        # Compute centered gram matrix between X and training data X_fit_
+        K = self._centerer.transform(self._get_kernel(X, self.X_fit_))
+
+        # scale eigenvectors (properly account for null-space for dot product)
+        non_zeros = np.flatnonzero(self.eigenvalues_)
+        scaled_alphas = np.zeros_like(self.eigenvectors_)
+        scaled_alphas[:, non_zeros] = self.eigenvectors_[:, non_zeros] / np.sqrt(
+            self.eigenvalues_[non_zeros]
+        )
+
+        # Project with a scalar product between K and the scaled eigenvectors
+        return np.dot(K, scaled_alphas)
+
+    def inverse_transform(self, X):
+        """Transform X back to original space.
+
+        ``inverse_transform`` approximates the inverse transformation using
+        a learned pre-image. The pre-image is learned by kernel ridge
+        regression of the original data on their low-dimensional representation
+        vectors.
+
+        .. note:
+            :meth:`~sklearn.decomposition.fit` internally uses a centered
+            kernel. As the centered kernel no longer contains the information
+            of the mean of kernel features, such information is not taken into
+            account in reconstruction.
+
+        .. note::
+            When users want to compute inverse transformation for 'linear'
+            kernel, it is recommended that they use
+            :class:`~sklearn.decomposition.PCA` instead. Unlike
+            :class:`~sklearn.decomposition.PCA`,
+            :class:`~sklearn.decomposition.KernelPCA`'s ``inverse_transform``
+            does not reconstruct the mean of data when 'linear' kernel is used
+            due to the use of centered kernel.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_components)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        Returns
+        -------
+        X_original : ndarray of shape (n_samples, n_features)
+            Original data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        References
+        ----------
+        `Bakır, Gökhan H., Jason Weston, and Bernhard Schölkopf.
+        "Learning to find pre-images."
+        Advances in neural information processing systems 16 (2004): 449-456.
+        <https://papers.nips.cc/paper/2003/file/ac1ad983e08ad3304a97e147f522747e-Paper.pdf>`_
+        """
+        if not self.fit_inverse_transform:
+            raise NotFittedError(
+                "The fit_inverse_transform parameter was not"
+                " set to True when instantiating and hence "
+                "the inverse transform is not available."
+            )
+
+        K = self._get_kernel(X, self.X_transformed_fit_)
+        return np.dot(K, self.dual_coef_)
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        tags.input_tags.pairwise = self.kernel == "precomputed"
+        return tags
+
+    @property
+    def _n_features_out(self):
+        """Number of transformed output features."""
+        return self.eigenvalues_.shape[0]
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_lda.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_lda.py
@@ -0,0 +1,959 @@
+"""
+
+=============================================================
+Online Latent Dirichlet Allocation with variational inference
+=============================================================
+
+This implementation is modified from Matthew D. Hoffman's onlineldavb code
+Link: https://github.com/blei-lab/onlineldavb
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from numbers import Integral, Real
+
+import numpy as np
+import scipy.sparse as sp
+from joblib import effective_n_jobs
+from scipy.special import gammaln, logsumexp
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..utils import check_random_state, gen_batches, gen_even_slices
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import check_is_fitted, check_non_negative, validate_data
+from ._online_lda_fast import (
+    _dirichlet_expectation_1d as cy_dirichlet_expectation_1d,
+)
+from ._online_lda_fast import (
+    _dirichlet_expectation_2d,
+)
+from ._online_lda_fast import (
+    mean_change as cy_mean_change,
+)
+
+EPS = np.finfo(float).eps
+
+
+def _update_doc_distribution(
+    X,
+    exp_topic_word_distr,
+    doc_topic_prior,
+    max_doc_update_iter,
+    mean_change_tol,
+    cal_sstats,
+    random_state,
+):
+    """E-step: update document-topic distribution.
+
+    Parameters
+    ----------
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
+        Document word matrix.
+
+    exp_topic_word_distr : ndarray of shape (n_topics, n_features)
+        Exponential value of expectation of log topic word distribution.
+        In the literature, this is `exp(E[log(beta)])`.
+
+    doc_topic_prior : float
+        Prior of document topic distribution `theta`.
+
+    max_doc_update_iter : int
+        Max number of iterations for updating document topic distribution in
+        the E-step.
+
+    mean_change_tol : float
+        Stopping tolerance for updating document topic distribution in E-step.
+
+    cal_sstats : bool
+        Parameter that indicate to calculate sufficient statistics or not.
+        Set `cal_sstats` to `True` when we need to run M-step.
+
+    random_state : RandomState instance or None
+        Parameter that indicate how to initialize document topic distribution.
+        Set `random_state` to None will initialize document topic distribution
+        to a constant number.
+
+    Returns
+    -------
+    (doc_topic_distr, suff_stats) :
+        `doc_topic_distr` is unnormalized topic distribution for each document.
+        In the literature, this is `gamma`. we can calculate `E[log(theta)]`
+        from it.
+        `suff_stats` is expected sufficient statistics for the M-step.
+            When `cal_sstats == False`, this will be None.
+
+    """
+    is_sparse_x = sp.issparse(X)
+    n_samples, n_features = X.shape
+    n_topics = exp_topic_word_distr.shape[0]
+
+    if random_state:
+        doc_topic_distr = random_state.gamma(100.0, 0.01, (n_samples, n_topics)).astype(
+            X.dtype, copy=False
+        )
+    else:
+        doc_topic_distr = np.ones((n_samples, n_topics), dtype=X.dtype)
+
+    # In the literature, this is `exp(E[log(theta)])`
+    exp_doc_topic = np.exp(_dirichlet_expectation_2d(doc_topic_distr))
+
+    # diff on `component_` (only calculate it when `cal_diff` is True)
+    suff_stats = (
+        np.zeros(exp_topic_word_distr.shape, dtype=X.dtype) if cal_sstats else None
+    )
+
+    if is_sparse_x:
+        X_data = X.data
+        X_indices = X.indices
+        X_indptr = X.indptr
+
+    # These cython functions are called in a nested loop on usually very small arrays
+    # (length=n_topics). In that case, finding the appropriate signature of the
+    # fused-typed function can be more costly than its execution, hence the dispatch
+    # is done outside of the loop.
+    ctype = "float" if X.dtype == np.float32 else "double"
+    mean_change = cy_mean_change[ctype]
+    dirichlet_expectation_1d = cy_dirichlet_expectation_1d[ctype]
+    eps = np.finfo(X.dtype).eps
+
+    for idx_d in range(n_samples):
+        if is_sparse_x:
+            ids = X_indices[X_indptr[idx_d] : X_indptr[idx_d + 1]]
+            cnts = X_data[X_indptr[idx_d] : X_indptr[idx_d + 1]]
+        else:
+            ids = np.nonzero(X[idx_d, :])[0]
+            cnts = X[idx_d, ids]
+
+        doc_topic_d = doc_topic_distr[idx_d, :]
+        # The next one is a copy, since the inner loop overwrites it.
+        exp_doc_topic_d = exp_doc_topic[idx_d, :].copy()
+        exp_topic_word_d = exp_topic_word_distr[:, ids]
+
+        # Iterate between `doc_topic_d` and `norm_phi` until convergence
+        for _ in range(0, max_doc_update_iter):
+            last_d = doc_topic_d
+
+            # The optimal phi_{dwk} is proportional to
+            # exp(E[log(theta_{dk})]) * exp(E[log(beta_{dw})]).
+            norm_phi = np.dot(exp_doc_topic_d, exp_topic_word_d) + eps
+
+            doc_topic_d = exp_doc_topic_d * np.dot(cnts / norm_phi, exp_topic_word_d.T)
+            # Note: adds doc_topic_prior to doc_topic_d, in-place.
+            dirichlet_expectation_1d(doc_topic_d, doc_topic_prior, exp_doc_topic_d)
+
+            if mean_change(last_d, doc_topic_d) < mean_change_tol:
+                break
+        doc_topic_distr[idx_d, :] = doc_topic_d
+
+        # Contribution of document d to the expected sufficient
+        # statistics for the M step.
+        if cal_sstats:
+            norm_phi = np.dot(exp_doc_topic_d, exp_topic_word_d) + eps
+            suff_stats[:, ids] += np.outer(exp_doc_topic_d, cnts / norm_phi)
+
+    return (doc_topic_distr, suff_stats)
+
+
+class LatentDirichletAllocation(
+    ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator
+):
+    """Latent Dirichlet Allocation with online variational Bayes algorithm.
+
+    The implementation is based on [1]_ and [2]_.
+
+    .. versionadded:: 0.17
+
+    Read more in the :ref:`User Guide <LatentDirichletAllocation>`.
+
+    Parameters
+    ----------
+    n_components : int, default=10
+        Number of topics.
+
+        .. versionchanged:: 0.19
+            ``n_topics`` was renamed to ``n_components``
+
+    doc_topic_prior : float, default=None
+        Prior of document topic distribution `theta`. If the value is None,
+        defaults to `1 / n_components`.
+        In [1]_, this is called `alpha`.
+
+    topic_word_prior : float, default=None
+        Prior of topic word distribution `beta`. If the value is None, defaults
+        to `1 / n_components`.
+        In [1]_, this is called `eta`.
+
+    learning_method : {'batch', 'online'}, default='batch'
+        Method used to update `_component`. Only used in :meth:`fit` method.
+        In general, if the data size is large, the online update will be much
+        faster than the batch update.
+
+        Valid options:
+
+        - 'batch': Batch variational Bayes method. Use all training data in each EM
+          update. Old `components_` will be overwritten in each iteration.
+        - 'online': Online variational Bayes method. In each EM update, use mini-batch
+          of training data to update the ``components_`` variable incrementally. The
+          learning rate is controlled by the ``learning_decay`` and the
+          ``learning_offset`` parameters.
+
+        .. versionchanged:: 0.20
+            The default learning method is now ``"batch"``.
+
+    learning_decay : float, default=0.7
+        It is a parameter that control learning rate in the online learning
+        method. The value should be set between (0.5, 1.0] to guarantee
+        asymptotic convergence. When the value is 0.0 and batch_size is
+        ``n_samples``, the update method is same as batch learning. In the
+        literature, this is called kappa.
+
+    learning_offset : float, default=10.0
+        A (positive) parameter that downweights early iterations in online
+        learning.  It should be greater than 1.0. In the literature, this is
+        called tau_0.
+
+    max_iter : int, default=10
+        The maximum number of passes over the training data (aka epochs).
+        It only impacts the behavior in the :meth:`fit` method, and not the
+        :meth:`partial_fit` method.
+
+    batch_size : int, default=128
+        Number of documents to use in each EM iteration. Only used in online
+        learning.
+
+    evaluate_every : int, default=-1
+        How often to evaluate perplexity. Only used in `fit` method.
+        set it to 0 or negative number to not evaluate perplexity in
+        training at all. Evaluating perplexity can help you check convergence
+        in training process, but it will also increase total training time.
+        Evaluating perplexity in every iteration might increase training time
+        up to two-fold.
+
+    total_samples : int, default=1e6
+        Total number of documents. Only used in the :meth:`partial_fit` method.
+
+    perp_tol : float, default=1e-1
+        Perplexity tolerance. Only used when ``evaluate_every`` is greater than 0.
+
+    mean_change_tol : float, default=1e-3
+        Stopping tolerance for updating document topic distribution in E-step.
+
+    max_doc_update_iter : int, default=100
+        Max number of iterations for updating document topic distribution in
+        the E-step.
+
+    n_jobs : int, default=None
+        The number of jobs to use in the E-step.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    verbose : int, default=0
+        Verbosity level.
+
+    random_state : int, RandomState instance or None, default=None
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        Variational parameters for topic word distribution. Since the complete
+        conditional for topic word distribution is a Dirichlet,
+        ``components_[i, j]`` can be viewed as pseudocount that represents the
+        number of times word `j` was assigned to topic `i`.
+        It can also be viewed as distribution over the words for each topic
+        after normalization:
+        ``model.components_ / model.components_.sum(axis=1)[:, np.newaxis]``.
+
+    exp_dirichlet_component_ : ndarray of shape (n_components, n_features)
+        Exponential value of expectation of log topic word distribution.
+        In the literature, this is `exp(E[log(beta)])`.
+
+    n_batch_iter_ : int
+        Number of iterations of the EM step.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    n_iter_ : int
+        Number of passes over the dataset.
+
+    bound_ : float
+        Final perplexity score on training set.
+
+    doc_topic_prior_ : float
+        Prior of document topic distribution `theta`. If the value is None,
+        it is `1 / n_components`.
+
+    random_state_ : RandomState instance
+        RandomState instance that is generated either from a seed, the random
+        number generator or by `np.random`.
+
+    topic_word_prior_ : float
+        Prior of topic word distribution `beta`. If the value is None, it is
+        `1 / n_components`.
+
+    See Also
+    --------
+    sklearn.discriminant_analysis.LinearDiscriminantAnalysis:
+        A classifier with a linear decision boundary, generated by fitting
+        class conditional densities to the data and using Bayes' rule.
+
+    References
+    ----------
+    .. [1] "Online Learning for Latent Dirichlet Allocation", Matthew D.
+           Hoffman, David M. Blei, Francis Bach, 2010
+           https://github.com/blei-lab/onlineldavb
+
+    .. [2] "Stochastic Variational Inference", Matthew D. Hoffman,
+           David M. Blei, Chong Wang, John Paisley, 2013
+
+    Examples
+    --------
+    >>> from sklearn.decomposition import LatentDirichletAllocation
+    >>> from sklearn.datasets import make_multilabel_classification
+    >>> # This produces a feature matrix of token counts, similar to what
+    >>> # CountVectorizer would produce on text.
+    >>> X, _ = make_multilabel_classification(random_state=0)
+    >>> lda = LatentDirichletAllocation(n_components=5,
+    ...     random_state=0)
+    >>> lda.fit(X)
+    LatentDirichletAllocation(...)
+    >>> # get topics for some given samples:
+    >>> lda.transform(X[-2:])
+    array([[0.00360392, 0.25499205, 0.0036211 , 0.64236448, 0.09541846],
+           [0.15297572, 0.00362644, 0.44412786, 0.39568399, 0.003586  ]])
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [Interval(Integral, 0, None, closed="neither")],
+        "doc_topic_prior": [None, Interval(Real, 0, 1, closed="both")],
+        "topic_word_prior": [None, Interval(Real, 0, 1, closed="both")],
+        "learning_method": [StrOptions({"batch", "online"})],
+        "learning_decay": [Interval(Real, 0, 1, closed="both")],
+        "learning_offset": [Interval(Real, 1.0, None, closed="left")],
+        "max_iter": [Interval(Integral, 0, None, closed="left")],
+        "batch_size": [Interval(Integral, 0, None, closed="neither")],
+        "evaluate_every": [Interval(Integral, None, None, closed="neither")],
+        "total_samples": [Interval(Real, 0, None, closed="neither")],
+        "perp_tol": [Interval(Real, 0, None, closed="left")],
+        "mean_change_tol": [Interval(Real, 0, None, closed="left")],
+        "max_doc_update_iter": [Interval(Integral, 0, None, closed="left")],
+        "n_jobs": [None, Integral],
+        "verbose": ["verbose"],
+        "random_state": ["random_state"],
+    }
+
+    def __init__(
+        self,
+        n_components=10,
+        *,
+        doc_topic_prior=None,
+        topic_word_prior=None,
+        learning_method="batch",
+        learning_decay=0.7,
+        learning_offset=10.0,
+        max_iter=10,
+        batch_size=128,
+        evaluate_every=-1,
+        total_samples=1e6,
+        perp_tol=1e-1,
+        mean_change_tol=1e-3,
+        max_doc_update_iter=100,
+        n_jobs=None,
+        verbose=0,
+        random_state=None,
+    ):
+        self.n_components = n_components
+        self.doc_topic_prior = doc_topic_prior
+        self.topic_word_prior = topic_word_prior
+        self.learning_method = learning_method
+        self.learning_decay = learning_decay
+        self.learning_offset = learning_offset
+        self.max_iter = max_iter
+        self.batch_size = batch_size
+        self.evaluate_every = evaluate_every
+        self.total_samples = total_samples
+        self.perp_tol = perp_tol
+        self.mean_change_tol = mean_change_tol
+        self.max_doc_update_iter = max_doc_update_iter
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+        self.random_state = random_state
+
+    def _init_latent_vars(self, n_features, dtype=np.float64):
+        """Initialize latent variables."""
+
+        self.random_state_ = check_random_state(self.random_state)
+        self.n_batch_iter_ = 1
+        self.n_iter_ = 0
+
+        if self.doc_topic_prior is None:
+            self.doc_topic_prior_ = 1.0 / self.n_components
+        else:
+            self.doc_topic_prior_ = self.doc_topic_prior
+
+        if self.topic_word_prior is None:
+            self.topic_word_prior_ = 1.0 / self.n_components
+        else:
+            self.topic_word_prior_ = self.topic_word_prior
+
+        init_gamma = 100.0
+        init_var = 1.0 / init_gamma
+        # In the literature, this is called `lambda`
+        self.components_ = self.random_state_.gamma(
+            init_gamma, init_var, (self.n_components, n_features)
+        ).astype(dtype, copy=False)
+
+        # In the literature, this is `exp(E[log(beta)])`
+        self.exp_dirichlet_component_ = np.exp(
+            _dirichlet_expectation_2d(self.components_)
+        )
+
+    def _e_step(self, X, cal_sstats, random_init, parallel=None):
+        """E-step in EM update.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        cal_sstats : bool
+            Parameter that indicate whether to calculate sufficient statistics
+            or not. Set ``cal_sstats`` to True when we need to run M-step.
+
+        random_init : bool
+            Parameter that indicate whether to initialize document topic
+            distribution randomly in the E-step. Set it to True in training
+            steps.
+
+        parallel : joblib.Parallel, default=None
+            Pre-initialized instance of joblib.Parallel.
+
+        Returns
+        -------
+        (doc_topic_distr, suff_stats) :
+            `doc_topic_distr` is unnormalized topic distribution for each
+            document. In the literature, this is called `gamma`.
+            `suff_stats` is expected sufficient statistics for the M-step.
+            When `cal_sstats == False`, it will be None.
+
+        """
+
+        # Run e-step in parallel
+        random_state = self.random_state_ if random_init else None
+
+        # TODO: make Parallel._effective_n_jobs public instead?
+        n_jobs = effective_n_jobs(self.n_jobs)
+        if parallel is None:
+            parallel = Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1))
+        results = parallel(
+            delayed(_update_doc_distribution)(
+                X[idx_slice, :],
+                self.exp_dirichlet_component_,
+                self.doc_topic_prior_,
+                self.max_doc_update_iter,
+                self.mean_change_tol,
+                cal_sstats,
+                random_state,
+            )
+            for idx_slice in gen_even_slices(X.shape[0], n_jobs)
+        )
+
+        # merge result
+        doc_topics, sstats_list = zip(*results)
+        doc_topic_distr = np.vstack(doc_topics)
+
+        if cal_sstats:
+            # This step finishes computing the sufficient statistics for the
+            # M-step.
+            suff_stats = np.zeros(self.components_.shape, dtype=self.components_.dtype)
+            for sstats in sstats_list:
+                suff_stats += sstats
+            suff_stats *= self.exp_dirichlet_component_
+        else:
+            suff_stats = None
+
+        return (doc_topic_distr, suff_stats)
+
+    def _em_step(self, X, total_samples, batch_update, parallel=None):
+        """EM update for 1 iteration.
+
+        update `component_` by batch VB or online VB.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        total_samples : int
+            Total number of documents. It is only used when
+            batch_update is `False`.
+
+        batch_update : bool
+            Parameter that controls updating method.
+            `True` for batch learning, `False` for online learning.
+
+        parallel : joblib.Parallel, default=None
+            Pre-initialized instance of joblib.Parallel
+
+        Returns
+        -------
+        doc_topic_distr : ndarray of shape (n_samples, n_components)
+            Unnormalized document topic distribution.
+        """
+
+        # E-step
+        _, suff_stats = self._e_step(
+            X, cal_sstats=True, random_init=True, parallel=parallel
+        )
+
+        # M-step
+        if batch_update:
+            self.components_ = self.topic_word_prior_ + suff_stats
+        else:
+            # online update
+            # In the literature, the weight is `rho`
+            weight = np.power(
+                self.learning_offset + self.n_batch_iter_, -self.learning_decay
+            )
+            doc_ratio = float(total_samples) / X.shape[0]
+            self.components_ *= 1 - weight
+            self.components_ += weight * (
+                self.topic_word_prior_ + doc_ratio * suff_stats
+            )
+
+        # update `component_` related variables
+        self.exp_dirichlet_component_ = np.exp(
+            _dirichlet_expectation_2d(self.components_)
+        )
+        self.n_batch_iter_ += 1
+        return
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.positive_only = True
+        tags.input_tags.sparse = True
+        tags.transformer_tags.preserves_dtype = ["float32", "float64"]
+        return tags
+
+    def _check_non_neg_array(self, X, reset_n_features, whom):
+        """check X format
+
+        check X format and make sure no negative value in X.
+
+        Parameters
+        ----------
+        X :  array-like or sparse matrix
+
+        """
+        dtype = [np.float64, np.float32] if reset_n_features else self.components_.dtype
+
+        X = validate_data(
+            self,
+            X,
+            reset=reset_n_features,
+            accept_sparse="csr",
+            dtype=dtype,
+        )
+        check_non_negative(X, whom)
+
+        return X
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def partial_fit(self, X, y=None):
+        """Online VB with Mini-Batch update.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self
+            Partially fitted estimator.
+        """
+        first_time = not hasattr(self, "components_")
+
+        X = self._check_non_neg_array(
+            X, reset_n_features=first_time, whom="LatentDirichletAllocation.partial_fit"
+        )
+        n_samples, n_features = X.shape
+        batch_size = self.batch_size
+
+        # initialize parameters or check
+        if first_time:
+            self._init_latent_vars(n_features, dtype=X.dtype)
+
+        if n_features != self.components_.shape[1]:
+            raise ValueError(
+                "The provided data has %d dimensions while "
+                "the model was trained with feature size %d."
+                % (n_features, self.components_.shape[1])
+            )
+
+        n_jobs = effective_n_jobs(self.n_jobs)
+        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:
+            for idx_slice in gen_batches(n_samples, batch_size):
+                self._em_step(
+                    X[idx_slice, :],
+                    total_samples=self.total_samples,
+                    batch_update=False,
+                    parallel=parallel,
+                )
+
+        return self
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Learn model for the data X with variational Bayes method.
+
+        When `learning_method` is 'online', use mini-batch update.
+        Otherwise, use batch update.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self
+            Fitted estimator.
+        """
+        X = self._check_non_neg_array(
+            X, reset_n_features=True, whom="LatentDirichletAllocation.fit"
+        )
+        n_samples, n_features = X.shape
+        max_iter = self.max_iter
+        evaluate_every = self.evaluate_every
+        learning_method = self.learning_method
+
+        batch_size = self.batch_size
+
+        # initialize parameters
+        self._init_latent_vars(n_features, dtype=X.dtype)
+        # change to perplexity later
+        last_bound = None
+        n_jobs = effective_n_jobs(self.n_jobs)
+        with Parallel(n_jobs=n_jobs, verbose=max(0, self.verbose - 1)) as parallel:
+            for i in range(max_iter):
+                if learning_method == "online":
+                    for idx_slice in gen_batches(n_samples, batch_size):
+                        self._em_step(
+                            X[idx_slice, :],
+                            total_samples=n_samples,
+                            batch_update=False,
+                            parallel=parallel,
+                        )
+                else:
+                    # batch update
+                    self._em_step(
+                        X, total_samples=n_samples, batch_update=True, parallel=parallel
+                    )
+
+                # check perplexity
+                if evaluate_every > 0 and (i + 1) % evaluate_every == 0:
+                    doc_topics_distr, _ = self._e_step(
+                        X, cal_sstats=False, random_init=False, parallel=parallel
+                    )
+                    bound = self._perplexity_precomp_distr(
+                        X, doc_topics_distr, sub_sampling=False
+                    )
+                    if self.verbose:
+                        print(
+                            "iteration: %d of max_iter: %d, perplexity: %.4f"
+                            % (i + 1, max_iter, bound)
+                        )
+
+                    if last_bound and abs(last_bound - bound) < self.perp_tol:
+                        break
+                    last_bound = bound
+
+                elif self.verbose:
+                    print("iteration: %d of max_iter: %d" % (i + 1, max_iter))
+                self.n_iter_ += 1
+
+        # calculate final perplexity value on train set
+        doc_topics_distr, _ = self._e_step(
+            X, cal_sstats=False, random_init=False, parallel=parallel
+        )
+        self.bound_ = self._perplexity_precomp_distr(
+            X, doc_topics_distr, sub_sampling=False
+        )
+
+        return self
+
+    def _unnormalized_transform(self, X):
+        """Transform data X according to fitted model.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        Returns
+        -------
+        doc_topic_distr : ndarray of shape (n_samples, n_components)
+            Document topic distribution for X.
+        """
+        doc_topic_distr, _ = self._e_step(X, cal_sstats=False, random_init=False)
+
+        return doc_topic_distr
+
+    def transform(self, X, *, normalize=True):
+        """Transform data X according to the fitted model.
+
+        .. versionchanged:: 0.18
+            `doc_topic_distr` is now normalized.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        normalize : bool, default=True
+            Whether to normalize the document topic distribution.
+
+        Returns
+        -------
+        doc_topic_distr : ndarray of shape (n_samples, n_components)
+            Document topic distribution for X.
+        """
+        check_is_fitted(self)
+        X = self._check_non_neg_array(
+            X, reset_n_features=False, whom="LatentDirichletAllocation.transform"
+        )
+        doc_topic_distr = self._unnormalized_transform(X)
+        if normalize:
+            doc_topic_distr /= doc_topic_distr.sum(axis=1)[:, np.newaxis]
+        return doc_topic_distr
+
+    def fit_transform(self, X, y=None, *, normalize=True):
+        """
+        Fit to data, then transform it.
+
+        Fits transformer to `X` and `y` and returns a transformed version of `X`.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Input samples.
+
+        y :  array-like of shape (n_samples,) or (n_samples, n_outputs), \
+                default=None
+            Target values (None for unsupervised transformations).
+
+        normalize : bool, default=True
+            Whether to normalize the document topic distribution in `transform`.
+
+        Returns
+        -------
+        X_new : ndarray array of shape (n_samples, n_components)
+            Transformed array.
+        """
+        return self.fit(X, y).transform(X, normalize=normalize)
+
+    def _approx_bound(self, X, doc_topic_distr, sub_sampling):
+        """Estimate the variational bound.
+
+        Estimate the variational bound over "all documents" using only the
+        documents passed in as X. Since log-likelihood of each word cannot
+        be computed directly, we use this bound to estimate it.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        doc_topic_distr : ndarray of shape (n_samples, n_components)
+            Document topic distribution. In the literature, this is called
+            gamma.
+
+        sub_sampling : bool, default=False
+            Compensate for subsampling of documents.
+            It is used in calculate bound in online learning.
+
+        Returns
+        -------
+        score : float
+
+        """
+
+        def _loglikelihood(prior, distr, dirichlet_distr, size):
+            # calculate log-likelihood
+            score = np.sum((prior - distr) * dirichlet_distr)
+            score += np.sum(gammaln(distr) - gammaln(prior))
+            score += np.sum(gammaln(prior * size) - gammaln(np.sum(distr, 1)))
+            return score
+
+        is_sparse_x = sp.issparse(X)
+        n_samples, n_components = doc_topic_distr.shape
+        n_features = self.components_.shape[1]
+        score = 0
+
+        dirichlet_doc_topic = _dirichlet_expectation_2d(doc_topic_distr)
+        dirichlet_component_ = _dirichlet_expectation_2d(self.components_)
+        doc_topic_prior = self.doc_topic_prior_
+        topic_word_prior = self.topic_word_prior_
+
+        if is_sparse_x:
+            X_data = X.data
+            X_indices = X.indices
+            X_indptr = X.indptr
+
+        # E[log p(docs | theta, beta)]
+        for idx_d in range(0, n_samples):
+            if is_sparse_x:
+                ids = X_indices[X_indptr[idx_d] : X_indptr[idx_d + 1]]
+                cnts = X_data[X_indptr[idx_d] : X_indptr[idx_d + 1]]
+            else:
+                ids = np.nonzero(X[idx_d, :])[0]
+                cnts = X[idx_d, ids]
+            temp = (
+                dirichlet_doc_topic[idx_d, :, np.newaxis] + dirichlet_component_[:, ids]
+            )
+            norm_phi = logsumexp(temp, axis=0)
+            score += np.dot(cnts, norm_phi)
+
+        # compute E[log p(theta | alpha) - log q(theta | gamma)]
+        score += _loglikelihood(
+            doc_topic_prior, doc_topic_distr, dirichlet_doc_topic, self.n_components
+        )
+
+        # Compensate for the subsampling of the population of documents
+        if sub_sampling:
+            doc_ratio = float(self.total_samples) / n_samples
+            score *= doc_ratio
+
+        # E[log p(beta | eta) - log q (beta | lambda)]
+        score += _loglikelihood(
+            topic_word_prior, self.components_, dirichlet_component_, n_features
+        )
+
+        return score
+
+    def score(self, X, y=None):
+        """Calculate approximate log-likelihood as score.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        score : float
+            Use approximate bound as score.
+        """
+        check_is_fitted(self)
+        X = self._check_non_neg_array(
+            X, reset_n_features=False, whom="LatentDirichletAllocation.score"
+        )
+
+        doc_topic_distr = self._unnormalized_transform(X)
+        score = self._approx_bound(X, doc_topic_distr, sub_sampling=False)
+        return score
+
+    def _perplexity_precomp_distr(self, X, doc_topic_distr=None, sub_sampling=False):
+        """Calculate approximate perplexity for data X with ability to accept
+        precomputed doc_topic_distr
+
+        Perplexity is defined as exp(-1. * log-likelihood per word)
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        doc_topic_distr : ndarray of shape (n_samples, n_components), \
+                default=None
+            Document topic distribution.
+            If it is None, it will be generated by applying transform on X.
+
+        Returns
+        -------
+        score : float
+            Perplexity score.
+        """
+        if doc_topic_distr is None:
+            doc_topic_distr = self._unnormalized_transform(X)
+        else:
+            n_samples, n_components = doc_topic_distr.shape
+            if n_samples != X.shape[0]:
+                raise ValueError(
+                    "Number of samples in X and doc_topic_distr do not match."
+                )
+
+            if n_components != self.n_components:
+                raise ValueError("Number of topics does not match.")
+
+        current_samples = X.shape[0]
+        bound = self._approx_bound(X, doc_topic_distr, sub_sampling)
+
+        if sub_sampling:
+            word_cnt = X.sum() * (float(self.total_samples) / current_samples)
+        else:
+            word_cnt = X.sum()
+        perword_bound = bound / word_cnt
+
+        return np.exp(-1.0 * perword_bound)
+
+    def perplexity(self, X, sub_sampling=False):
+        """Calculate approximate perplexity for data X.
+
+        Perplexity is defined as exp(-1. * log-likelihood per word)
+
+        .. versionchanged:: 0.19
+           *doc_topic_distr* argument has been deprecated and is ignored
+           because user no longer has access to unnormalized distribution
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Document word matrix.
+
+        sub_sampling : bool
+            Do sub-sampling or not.
+
+        Returns
+        -------
+        score : float
+            Perplexity score.
+        """
+        check_is_fitted(self)
+        X = self._check_non_neg_array(
+            X, reset_n_features=True, whom="LatentDirichletAllocation.perplexity"
+        )
+        return self._perplexity_precomp_distr(X, sub_sampling=sub_sampling)
+
+    @property
+    def _n_features_out(self):
+        """Number of transformed output features."""
+        return self.components_.shape[0]
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_nmf.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_nmf.py
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_online_lda_fast.cpython-312-x86_64-linux-gnu.so
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_online_lda_fast.cpython-312-x86_64-linux-gnu.so
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_online_lda_fast.pyx
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_online_lda_fast.pyx
@@ -0,0 +1,110 @@
+import numpy as np
+
+
+from cython cimport floating
+from libc.math cimport exp, fabs, log
+
+from ..utils._typedefs cimport float64_t, intp_t
+
+
+def mean_change(const floating[:] arr_1, const floating[:] arr_2):
+    """Calculate the mean difference between two arrays.
+
+    Equivalent to np.abs(arr_1 - arr2).mean().
+    """
+
+    cdef float64_t total, diff
+    cdef intp_t i, size
+
+    size = arr_1.shape[0]
+    total = 0.0
+    for i in range(size):
+        diff = fabs(arr_1[i] - arr_2[i])
+        total += diff
+
+    return total / size
+
+
+def _dirichlet_expectation_1d(
+    floating[:] doc_topic,
+    floating doc_topic_prior,
+    floating[:] out
+):
+    """Dirichlet expectation for a single sample:
+        exp(E[log(theta)]) for theta ~ Dir(doc_topic)
+    after adding doc_topic_prior to doc_topic, in-place.
+
+    Equivalent to
+        doc_topic += doc_topic_prior
+        out[:] = np.exp(psi(doc_topic) - psi(np.sum(doc_topic)))
+    """
+
+    cdef floating dt, psi_total, total
+    cdef intp_t i, size
+
+    size = doc_topic.shape[0]
+
+    total = 0.0
+    for i in range(size):
+        dt = doc_topic[i] + doc_topic_prior
+        doc_topic[i] = dt
+        total += dt
+    psi_total = psi(total)
+
+    for i in range(size):
+        out[i] = exp(psi(doc_topic[i]) - psi_total)
+
+
+def _dirichlet_expectation_2d(const floating[:, :] arr):
+    """Dirichlet expectation for multiple samples:
+    E[log(theta)] for theta ~ Dir(arr).
+
+    Equivalent to psi(arr) - psi(np.sum(arr, axis=1))[:, np.newaxis].
+
+    Note that unlike _dirichlet_expectation_1d, this function doesn't compute
+    the exp and doesn't add in the prior.
+    """
+    cdef floating row_total, psi_row_total
+    cdef floating[:, :] d_exp
+    cdef intp_t i, j, n_rows, n_cols
+
+    n_rows = arr.shape[0]
+    n_cols = arr.shape[1]
+
+    d_exp = np.empty_like(arr)
+    for i in range(n_rows):
+        row_total = 0
+        for j in range(n_cols):
+            row_total += arr[i, j]
+        psi_row_total = psi(row_total)
+
+        for j in range(n_cols):
+            d_exp[i, j] = psi(arr[i, j]) - psi_row_total
+
+    return d_exp.base
+
+
+# Psi function for positive arguments. Optimized for speed, not accuracy.
+#
+# After: J. Bernardo (1976). Algorithm AS 103: Psi (Digamma) Function.
+# https://www.uv.es/~bernardo/1976AppStatist.pdf
+cdef floating psi(floating x) noexcept nogil:
+    cdef double EULER = 0.577215664901532860606512090082402431
+    if x <= 1e-6:
+        # psi(x) = -EULER - 1/x + O(x)
+        return -EULER - 1. / x
+
+    cdef floating r, result = 0
+
+    # psi(x + 1) = psi(x) + 1/x
+    while x < 6:
+        result -= 1. / x
+        x += 1
+
+    # psi(x) = log(x) - 1/(2x) - 1/(12x**2) + 1/(120x**4) - 1/(252x**6)
+    #          + O(1/x**8)
+    r = 1. / x
+    result += log(x) - .5 * r
+    r = r * r
+    result -= r * ((1./12.) - r * ((1./120.) - r * (1./252.)))
+    return result
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_pca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_pca.py
@@ -0,0 +1,857 @@
+"""Principal Component Analysis."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from math import lgamma, log, sqrt
+from numbers import Integral, Real
+
+import numpy as np
+from scipy import linalg
+from scipy.sparse import issparse
+from scipy.sparse.linalg import svds
+
+from ..base import _fit_context
+from ..utils import check_random_state
+from ..utils._arpack import _init_arpack_v0
+from ..utils._array_api import _convert_to_numpy, get_namespace
+from ..utils._param_validation import Interval, RealNotInt, StrOptions
+from ..utils.extmath import _randomized_svd, fast_logdet, stable_cumsum, svd_flip
+from ..utils.sparsefuncs import _implicit_column_offset, mean_variance_axis
+from ..utils.validation import check_is_fitted, validate_data
+from ._base import _BasePCA
+
+
+def _assess_dimension(spectrum, rank, n_samples):
+    """Compute the log-likelihood of a rank ``rank`` dataset.
+
+    The dataset is assumed to be embedded in gaussian noise of shape(n,
+    dimf) having spectrum ``spectrum``. This implements the method of
+    T. P. Minka.
+
+    Parameters
+    ----------
+    spectrum : ndarray of shape (n_features,)
+        Data spectrum.
+    rank : int
+        Tested rank value. It should be strictly lower than n_features,
+        otherwise the method isn't specified (division by zero in equation
+        (31) from the paper).
+    n_samples : int
+        Number of samples.
+
+    Returns
+    -------
+    ll : float
+        The log-likelihood.
+
+    References
+    ----------
+    This implements the method of `Thomas P. Minka:
+    Automatic Choice of Dimensionality for PCA. NIPS 2000: 598-604
+    <https://proceedings.neurips.cc/paper/2000/file/7503cfacd12053d309b6bed5c89de212-Paper.pdf>`_
+    """
+    xp, _ = get_namespace(spectrum)
+
+    n_features = spectrum.shape[0]
+    if not 1 <= rank < n_features:
+        raise ValueError("the tested rank should be in [1, n_features - 1]")
+
+    eps = 1e-15
+
+    if spectrum[rank - 1] < eps:
+        # When the tested rank is associated with a small eigenvalue, there's
+        # no point in computing the log-likelihood: it's going to be very
+        # small and won't be the max anyway. Also, it can lead to numerical
+        # issues below when computing pa, in particular in log((spectrum[i] -
+        # spectrum[j]) because this will take the log of something very small.
+        return -xp.inf
+
+    pu = -rank * log(2.0)
+    for i in range(1, rank + 1):
+        pu += (
+            lgamma((n_features - i + 1) / 2.0) - log(xp.pi) * (n_features - i + 1) / 2.0
+        )
+
+    pl = xp.sum(xp.log(spectrum[:rank]))
+    pl = -pl * n_samples / 2.0
+
+    v = max(eps, xp.sum(spectrum[rank:]) / (n_features - rank))
+    pv = -log(v) * n_samples * (n_features - rank) / 2.0
+
+    m = n_features * rank - rank * (rank + 1.0) / 2.0
+    pp = log(2.0 * xp.pi) * (m + rank) / 2.0
+
+    pa = 0.0
+    spectrum_ = xp.asarray(spectrum, copy=True)
+    spectrum_[rank:n_features] = v
+    for i in range(rank):
+        for j in range(i + 1, spectrum.shape[0]):
+            pa += log(
+                (spectrum[i] - spectrum[j]) * (1.0 / spectrum_[j] - 1.0 / spectrum_[i])
+            ) + log(n_samples)
+
+    ll = pu + pl + pv + pp - pa / 2.0 - rank * log(n_samples) / 2.0
+
+    return ll
+
+
+def _infer_dimension(spectrum, n_samples):
+    """Infers the dimension of a dataset with a given spectrum.
+
+    The returned value will be in [1, n_features - 1].
+    """
+    xp, _ = get_namespace(spectrum)
+
+    ll = xp.empty_like(spectrum)
+    ll[0] = -xp.inf  # we don't want to return n_components = 0
+    for rank in range(1, spectrum.shape[0]):
+        ll[rank] = _assess_dimension(spectrum, rank, n_samples)
+    return xp.argmax(ll)
+
+
+class PCA(_BasePCA):
+    """Principal component analysis (PCA).
+
+    Linear dimensionality reduction using Singular Value Decomposition of the
+    data to project it to a lower dimensional space. The input data is centered
+    but not scaled for each feature before applying the SVD.
+
+    It uses the LAPACK implementation of the full SVD or a randomized truncated
+    SVD by the method of Halko et al. 2009, depending on the shape of the input
+    data and the number of components to extract.
+
+    With sparse inputs, the ARPACK implementation of the truncated SVD can be
+    used (i.e. through :func:`scipy.sparse.linalg.svds`). Alternatively, one
+    may consider :class:`TruncatedSVD` where the data are not centered.
+
+    Notice that this class only supports sparse inputs for some solvers such as
+    "arpack" and "covariance_eigh". See :class:`TruncatedSVD` for an
+    alternative with sparse data.
+
+    For a usage example, see
+    :ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py`
+
+    Read more in the :ref:`User Guide <PCA>`.
+
+    Parameters
+    ----------
+    n_components : int, float or 'mle', default=None
+        Number of components to keep.
+        if n_components is not set all components are kept::
+
+            n_components == min(n_samples, n_features)
+
+        If ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's
+        MLE is used to guess the dimension. Use of ``n_components == 'mle'``
+        will interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.
+
+        If ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the
+        number of components such that the amount of variance that needs to be
+        explained is greater than the percentage specified by n_components.
+
+        If ``svd_solver == 'arpack'``, the number of components must be
+        strictly less than the minimum of n_features and n_samples.
+
+        Hence, the None case results in::
+
+            n_components == min(n_samples, n_features) - 1
+
+    copy : bool, default=True
+        If False, data passed to fit are overwritten and running
+        fit(X).transform(X) will not yield the expected results,
+        use fit_transform(X) instead.
+
+    whiten : bool, default=False
+        When True (False by default) the `components_` vectors are multiplied
+        by the square root of n_samples and then divided by the singular values
+        to ensure uncorrelated outputs with unit component-wise variances.
+
+        Whitening will remove some information from the transformed signal
+        (the relative variance scales of the components) but can sometime
+        improve the predictive accuracy of the downstream estimators by
+        making their data respect some hard-wired assumptions.
+
+    svd_solver : {'auto', 'full', 'covariance_eigh', 'arpack', 'randomized'},\
+            default='auto'
+        "auto" :
+            The solver is selected by a default 'auto' policy is based on `X.shape` and
+            `n_components`: if the input data has fewer than 1000 features and
+            more than 10 times as many samples, then the "covariance_eigh"
+            solver is used. Otherwise, if the input data is larger than 500x500
+            and the number of components to extract is lower than 80% of the
+            smallest dimension of the data, then the more efficient
+            "randomized" method is selected. Otherwise the exact "full" SVD is
+            computed and optionally truncated afterwards.
+        "full" :
+            Run exact full SVD calling the standard LAPACK solver via
+            `scipy.linalg.svd` and select the components by postprocessing
+        "covariance_eigh" :
+            Precompute the covariance matrix (on centered data), run a
+            classical eigenvalue decomposition on the covariance matrix
+            typically using LAPACK and select the components by postprocessing.
+            This solver is very efficient for n_samples >> n_features and small
+            n_features. It is, however, not tractable otherwise for large
+            n_features (large memory footprint required to materialize the
+            covariance matrix). Also note that compared to the "full" solver,
+            this solver effectively doubles the condition number and is
+            therefore less numerical stable (e.g. on input data with a large
+            range of singular values).
+        "arpack" :
+            Run SVD truncated to `n_components` calling ARPACK solver via
+            `scipy.sparse.linalg.svds`. It requires strictly
+            `0 < n_components < min(X.shape)`
+        "randomized" :
+            Run randomized SVD by the method of Halko et al.
+
+        .. versionadded:: 0.18.0
+
+        .. versionchanged:: 1.5
+            Added the 'covariance_eigh' solver.
+
+    tol : float, default=0.0
+        Tolerance for singular values computed by svd_solver == 'arpack'.
+        Must be of range [0.0, infinity).
+
+        .. versionadded:: 0.18.0
+
+    iterated_power : int or 'auto', default='auto'
+        Number of iterations for the power method computed by
+        svd_solver == 'randomized'.
+        Must be of range [0, infinity).
+
+        .. versionadded:: 0.18.0
+
+    n_oversamples : int, default=10
+        This parameter is only relevant when `svd_solver="randomized"`.
+        It corresponds to the additional number of random vectors to sample the
+        range of `X` so as to ensure proper conditioning. See
+        :func:`~sklearn.utils.extmath.randomized_svd` for more details.
+
+        .. versionadded:: 1.1
+
+    power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'
+        Power iteration normalizer for randomized SVD solver.
+        Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`
+        for more details.
+
+        .. versionadded:: 1.1
+
+    random_state : int, RandomState instance or None, default=None
+        Used when the 'arpack' or 'randomized' solvers are used. Pass an int
+        for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+        .. versionadded:: 0.18.0
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        Principal axes in feature space, representing the directions of
+        maximum variance in the data. Equivalently, the right singular
+        vectors of the centered input data, parallel to its eigenvectors.
+        The components are sorted by decreasing ``explained_variance_``.
+
+    explained_variance_ : ndarray of shape (n_components,)
+        The amount of variance explained by each of the selected components.
+        The variance estimation uses `n_samples - 1` degrees of freedom.
+
+        Equal to n_components largest eigenvalues
+        of the covariance matrix of X.
+
+        .. versionadded:: 0.18
+
+    explained_variance_ratio_ : ndarray of shape (n_components,)
+        Percentage of variance explained by each of the selected components.
+
+        If ``n_components`` is not set then all components are stored and the
+        sum of the ratios is equal to 1.0.
+
+    singular_values_ : ndarray of shape (n_components,)
+        The singular values corresponding to each of the selected components.
+        The singular values are equal to the 2-norms of the ``n_components``
+        variables in the lower-dimensional space.
+
+        .. versionadded:: 0.19
+
+    mean_ : ndarray of shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+
+        Equal to `X.mean(axis=0)`.
+
+    n_components_ : int
+        The estimated number of components. When n_components is set
+        to 'mle' or a number between 0 and 1 (with svd_solver == 'full') this
+        number is estimated from input data. Otherwise it equals the parameter
+        n_components, or the lesser value of n_features and n_samples
+        if n_components is None.
+
+    n_samples_ : int
+        Number of samples in the training data.
+
+    noise_variance_ : float
+        The estimated noise covariance following the Probabilistic PCA model
+        from Tipping and Bishop 1999. See "Pattern Recognition and
+        Machine Learning" by C. Bishop, 12.2.1 p. 574 or
+        http://www.miketipping.com/papers/met-mppca.pdf. It is required to
+        compute the estimated data covariance and score samples.
+
+        Equal to the average of (min(n_features, n_samples) - n_components)
+        smallest eigenvalues of the covariance matrix of X.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    KernelPCA : Kernel Principal Component Analysis.
+    SparsePCA : Sparse Principal Component Analysis.
+    TruncatedSVD : Dimensionality reduction using truncated SVD.
+    IncrementalPCA : Incremental Principal Component Analysis.
+
+    References
+    ----------
+    For n_components == 'mle', this class uses the method from:
+    `Minka, T. P.. "Automatic choice of dimensionality for PCA".
+    In NIPS, pp. 598-604 <https://tminka.github.io/papers/pca/minka-pca.pdf>`_
+
+    Implements the probabilistic PCA model from:
+    `Tipping, M. E., and Bishop, C. M. (1999). "Probabilistic principal
+    component analysis". Journal of the Royal Statistical Society:
+    Series B (Statistical Methodology), 61(3), 611-622.
+    <http://www.miketipping.com/papers/met-mppca.pdf>`_
+    via the score and score_samples methods.
+
+    For svd_solver == 'arpack', refer to `scipy.sparse.linalg.svds`.
+
+    For svd_solver == 'randomized', see:
+    :doi:`Halko, N., Martinsson, P. G., and Tropp, J. A. (2011).
+    "Finding structure with randomness: Probabilistic algorithms for
+    constructing approximate matrix decompositions".
+    SIAM review, 53(2), 217-288.
+    <10.1137/090771806>`
+    and also
+    :doi:`Martinsson, P. G., Rokhlin, V., and Tygert, M. (2011).
+    "A randomized algorithm for the decomposition of matrices".
+    Applied and Computational Harmonic Analysis, 30(1), 47-68.
+    <10.1016/j.acha.2010.02.003>`
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.decomposition import PCA
+    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    >>> pca = PCA(n_components=2)
+    >>> pca.fit(X)
+    PCA(n_components=2)
+    >>> print(pca.explained_variance_ratio_)
+    [0.9924 0.0075]
+    >>> print(pca.singular_values_)
+    [6.30061 0.54980]
+
+    >>> pca = PCA(n_components=2, svd_solver='full')
+    >>> pca.fit(X)
+    PCA(n_components=2, svd_solver='full')
+    >>> print(pca.explained_variance_ratio_)
+    [0.9924 0.00755]
+    >>> print(pca.singular_values_)
+    [6.30061 0.54980]
+
+    >>> pca = PCA(n_components=1, svd_solver='arpack')
+    >>> pca.fit(X)
+    PCA(n_components=1, svd_solver='arpack')
+    >>> print(pca.explained_variance_ratio_)
+    [0.99244]
+    >>> print(pca.singular_values_)
+    [6.30061]
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [
+            Interval(Integral, 0, None, closed="left"),
+            Interval(RealNotInt, 0, 1, closed="neither"),
+            StrOptions({"mle"}),
+            None,
+        ],
+        "copy": ["boolean"],
+        "whiten": ["boolean"],
+        "svd_solver": [
+            StrOptions({"auto", "full", "covariance_eigh", "arpack", "randomized"})
+        ],
+        "tol": [Interval(Real, 0, None, closed="left")],
+        "iterated_power": [
+            StrOptions({"auto"}),
+            Interval(Integral, 0, None, closed="left"),
+        ],
+        "n_oversamples": [Interval(Integral, 1, None, closed="left")],
+        "power_iteration_normalizer": [StrOptions({"auto", "QR", "LU", "none"})],
+        "random_state": ["random_state"],
+    }
+
+    def __init__(
+        self,
+        n_components=None,
+        *,
+        copy=True,
+        whiten=False,
+        svd_solver="auto",
+        tol=0.0,
+        iterated_power="auto",
+        n_oversamples=10,
+        power_iteration_normalizer="auto",
+        random_state=None,
+    ):
+        self.n_components = n_components
+        self.copy = copy
+        self.whiten = whiten
+        self.svd_solver = svd_solver
+        self.tol = tol
+        self.iterated_power = iterated_power
+        self.n_oversamples = n_oversamples
+        self.power_iteration_normalizer = power_iteration_normalizer
+        self.random_state = random_state
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the model with X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : Ignored
+            Ignored.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        self._fit(X)
+        return self
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit_transform(self, X, y=None):
+        """Fit the model with X and apply the dimensionality reduction on X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : Ignored
+            Ignored.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Transformed values.
+
+        Notes
+        -----
+        This method returns a Fortran-ordered array. To convert it to a
+        C-ordered array, use 'np.ascontiguousarray'.
+        """
+        U, S, _, X, x_is_centered, xp = self._fit(X)
+        if U is not None:
+            U = U[:, : self.n_components_]
+
+            if self.whiten:
+                # X_new = X * V / S * sqrt(n_samples) = U * sqrt(n_samples)
+                U *= sqrt(X.shape[0] - 1)
+            else:
+                # X_new = X * V = U * S * Vt * V = U * S
+                U *= S[: self.n_components_]
+
+            return U
+        else:  # solver="covariance_eigh" does not compute U at fit time.
+            return self._transform(X, xp, x_is_centered=x_is_centered)
+
+    def _fit(self, X):
+        """Dispatch to the right submethod depending on the chosen solver."""
+        xp, is_array_api_compliant = get_namespace(X)
+
+        # Raise an error for sparse input and unsupported svd_solver
+        if issparse(X) and self.svd_solver not in ["auto", "arpack", "covariance_eigh"]:
+            raise TypeError(
+                'PCA only support sparse inputs with the "arpack" and'
+                f' "covariance_eigh" solvers, while "{self.svd_solver}" was passed. See'
+                " TruncatedSVD for a possible alternative."
+            )
+        if self.svd_solver == "arpack" and is_array_api_compliant:
+            raise ValueError(
+                "PCA with svd_solver='arpack' is not supported for Array API inputs."
+            )
+
+        # Validate the data, without ever forcing a copy as any solver that
+        # supports sparse input data and the `covariance_eigh` solver are
+        # written in a way to avoid the need for any inplace modification of
+        # the input data contrary to the other solvers.
+        # The copy will happen
+        # later, only if needed, once the solver negotiation below is done.
+        X = validate_data(
+            self,
+            X,
+            dtype=[xp.float64, xp.float32],
+            force_writeable=True,
+            accept_sparse=("csr", "csc"),
+            ensure_2d=True,
+            copy=False,
+        )
+        self._fit_svd_solver = self.svd_solver
+        if self._fit_svd_solver == "auto" and issparse(X):
+            self._fit_svd_solver = "arpack"
+
+        if self.n_components is None:
+            if self._fit_svd_solver != "arpack":
+                n_components = min(X.shape)
+            else:
+                n_components = min(X.shape) - 1
+        else:
+            n_components = self.n_components
+
+        if self._fit_svd_solver == "auto":
+            # Tall and skinny problems are best handled by precomputing the
+            # covariance matrix.
+            if X.shape[1] <= 1_000 and X.shape[0] >= 10 * X.shape[1]:
+                self._fit_svd_solver = "covariance_eigh"
+            # Small problem or n_components == 'mle', just call full PCA
+            elif max(X.shape) <= 500 or n_components == "mle":
+                self._fit_svd_solver = "full"
+            elif 1 <= n_components < 0.8 * min(X.shape):
+                self._fit_svd_solver = "randomized"
+            # This is also the case of n_components in (0, 1)
+            else:
+                self._fit_svd_solver = "full"
+
+        # Call different fits for either full or truncated SVD
+        if self._fit_svd_solver in ("full", "covariance_eigh"):
+            return self._fit_full(X, n_components, xp, is_array_api_compliant)
+        elif self._fit_svd_solver in ["arpack", "randomized"]:
+            return self._fit_truncated(X, n_components, xp)
+
+    def _fit_full(self, X, n_components, xp, is_array_api_compliant):
+        """Fit the model by computing full SVD on X."""
+        n_samples, n_features = X.shape
+
+        if n_components == "mle":
+            if n_samples < n_features:
+                raise ValueError(
+                    "n_components='mle' is only supported if n_samples >= n_features"
+                )
+        elif not 0 <= n_components <= min(n_samples, n_features):
+            raise ValueError(
+                f"n_components={n_components} must be between 0 and "
+                f"min(n_samples, n_features)={min(n_samples, n_features)} with "
+                f"svd_solver={self._fit_svd_solver!r}"
+            )
+
+        self.mean_ = xp.mean(X, axis=0)
+        # When X is a scipy sparse matrix, self.mean_ is a numpy matrix, so we need
+        # to transform it to a 1D array. Note that this is not the case when X
+        # is a scipy sparse array.
+        # TODO: remove the following two lines when scikit-learn only depends
+        # on scipy versions that no longer support scipy.sparse matrices.
+        self.mean_ = xp.reshape(xp.asarray(self.mean_), (-1,))
+
+        if self._fit_svd_solver == "full":
+            X_centered = xp.asarray(X, copy=True) if self.copy else X
+            X_centered -= self.mean_
+            x_is_centered = not self.copy
+
+            if not is_array_api_compliant:
+                # Use scipy.linalg with NumPy/SciPy inputs for the sake of not
+                # introducing unanticipated behavior changes. In the long run we
+                # could instead decide to always use xp.linalg.svd for all inputs,
+                # but that would make this code rely on numpy's SVD instead of
+                # scipy's. It's not 100% clear whether they use the same LAPACK
+                # solver by default though (assuming both are built against the
+                # same BLAS).
+                U, S, Vt = linalg.svd(X_centered, full_matrices=False)
+            else:
+                U, S, Vt = xp.linalg.svd(X_centered, full_matrices=False)
+            explained_variance_ = (S**2) / (n_samples - 1)
+
+        else:
+            assert self._fit_svd_solver == "covariance_eigh"
+            # In the following, we center the covariance matrix C afterwards
+            # (without centering the data X first) to avoid an unnecessary copy
+            # of X. Note that the mean_ attribute is still needed to center
+            # test data in the transform method.
+            #
+            # Note: at the time of writing, `xp.cov` does not exist in the
+            # Array API standard:
+            # https://github.com/data-apis/array-api/issues/43
+            #
+            # Besides, using `numpy.cov`, as of numpy 1.26.0, would not be
+            # memory efficient for our use case when `n_samples >> n_features`:
+            # `numpy.cov` centers a copy of the data before computing the
+            # matrix product instead of subtracting a small `(n_features,
+            # n_features)` square matrix from the gram matrix X.T @ X, as we do
+            # below.
+            x_is_centered = False
+            C = X.T @ X
+            C -= (
+                n_samples
+                * xp.reshape(self.mean_, (-1, 1))
+                * xp.reshape(self.mean_, (1, -1))
+            )
+            C /= n_samples - 1
+            eigenvals, eigenvecs = xp.linalg.eigh(C)
+
+            # When X is a scipy sparse matrix, the following two datastructures
+            # are returned as instances of the soft-deprecated numpy.matrix
+            # class. Note that this problem does not occur when X is a scipy
+            # sparse array (or another other kind of supported array).
+            # TODO: remove the following two lines when scikit-learn only
+            # depends on scipy versions that no longer support scipy.sparse
+            # matrices.
+            eigenvals = xp.reshape(xp.asarray(eigenvals), (-1,))
+            eigenvecs = xp.asarray(eigenvecs)
+
+            eigenvals = xp.flip(eigenvals, axis=0)
+            eigenvecs = xp.flip(eigenvecs, axis=1)
+
+            # The covariance matrix C is positive semi-definite by
+            # construction. However, the eigenvalues returned by xp.linalg.eigh
+            # can be slightly negative due to numerical errors. This would be
+            # an issue for the subsequent sqrt, hence the manual clipping.
+            eigenvals[eigenvals < 0.0] = 0.0
+            explained_variance_ = eigenvals
+
+            # Re-construct SVD of centered X indirectly and make it consistent
+            # with the other solvers.
+            S = xp.sqrt(eigenvals * (n_samples - 1))
+            Vt = eigenvecs.T
+            U = None
+
+        # flip eigenvectors' sign to enforce deterministic output
+        U, Vt = svd_flip(U, Vt, u_based_decision=False)
+
+        components_ = Vt
+
+        # Get variance explained by singular values
+        total_var = xp.sum(explained_variance_)
+        explained_variance_ratio_ = explained_variance_ / total_var
+        singular_values_ = xp.asarray(S, copy=True)  # Store the singular values.
+
+        # Postprocess the number of components required
+        if n_components == "mle":
+            n_components = _infer_dimension(explained_variance_, n_samples)
+        elif 0 < n_components < 1.0:
+            # number of components for which the cumulated explained
+            # variance percentage is superior to the desired threshold
+            # side='right' ensures that number of features selected
+            # their variance is always greater than n_components float
+            # passed. More discussion in issue: #15669
+            if is_array_api_compliant:
+                # Convert to numpy as xp.cumsum and xp.searchsorted are not
+                # part of the Array API standard yet:
+                #
+                # https://github.com/data-apis/array-api/issues/597
+                # https://github.com/data-apis/array-api/issues/688
+                #
+                # Furthermore, it's not always safe to call them for namespaces
+                # that already implement them: for instance as
+                # cupy.searchsorted does not accept a float as second argument.
+                explained_variance_ratio_np = _convert_to_numpy(
+                    explained_variance_ratio_, xp=xp
+                )
+            else:
+                explained_variance_ratio_np = explained_variance_ratio_
+            ratio_cumsum = stable_cumsum(explained_variance_ratio_np)
+            n_components = np.searchsorted(ratio_cumsum, n_components, side="right") + 1
+
+        # Compute noise covariance using Probabilistic PCA model
+        # The sigma2 maximum likelihood (cf. eq. 12.46)
+        if n_components < min(n_features, n_samples):
+            self.noise_variance_ = xp.mean(explained_variance_[n_components:])
+        else:
+            self.noise_variance_ = 0.0
+
+        self.n_samples_ = n_samples
+        self.n_components_ = n_components
+        # Assign a copy of the result of the truncation of the components in
+        # order to:
+        # - release the memory used by the discarded components,
+        # - ensure that the kept components are allocated contiguously in
+        #   memory to make the transform method faster by leveraging cache
+        #   locality.
+        self.components_ = xp.asarray(components_[:n_components, :], copy=True)
+
+        # We do the same for the other arrays for the sake of consistency.
+        self.explained_variance_ = xp.asarray(
+            explained_variance_[:n_components], copy=True
+        )
+        self.explained_variance_ratio_ = xp.asarray(
+            explained_variance_ratio_[:n_components], copy=True
+        )
+        self.singular_values_ = xp.asarray(singular_values_[:n_components], copy=True)
+
+        return U, S, Vt, X, x_is_centered, xp
+
+    def _fit_truncated(self, X, n_components, xp):
+        """Fit the model by computing truncated SVD (by ARPACK or randomized)
+        on X.
+        """
+        n_samples, n_features = X.shape
+
+        svd_solver = self._fit_svd_solver
+        if isinstance(n_components, str):
+            raise ValueError(
+                "n_components=%r cannot be a string with svd_solver='%s'"
+                % (n_components, svd_solver)
+            )
+        elif not 1 <= n_components <= min(n_samples, n_features):
+            raise ValueError(
+                "n_components=%r must be between 1 and "
+                "min(n_samples, n_features)=%r with "
+                "svd_solver='%s'"
+                % (n_components, min(n_samples, n_features), svd_solver)
+            )
+        elif svd_solver == "arpack" and n_components == min(n_samples, n_features):
+            raise ValueError(
+                "n_components=%r must be strictly less than "
+                "min(n_samples, n_features)=%r with "
+                "svd_solver='%s'"
+                % (n_components, min(n_samples, n_features), svd_solver)
+            )
+
+        random_state = check_random_state(self.random_state)
+
+        # Center data
+        total_var = None
+        if issparse(X):
+            self.mean_, var = mean_variance_axis(X, axis=0)
+            total_var = var.sum() * n_samples / (n_samples - 1)  # ddof=1
+            X_centered = _implicit_column_offset(X, self.mean_)
+            x_is_centered = False
+        else:
+            self.mean_ = xp.mean(X, axis=0)
+            X_centered = xp.asarray(X, copy=True) if self.copy else X
+            X_centered -= self.mean_
+            x_is_centered = not self.copy
+
+        if svd_solver == "arpack":
+            v0 = _init_arpack_v0(min(X.shape), random_state)
+            U, S, Vt = svds(X_centered, k=n_components, tol=self.tol, v0=v0)
+            # svds doesn't abide by scipy.linalg.svd/randomized_svd
+            # conventions, so reverse its outputs.
+            S = S[::-1]
+            # flip eigenvectors' sign to enforce deterministic output
+            U, Vt = svd_flip(U[:, ::-1], Vt[::-1], u_based_decision=False)
+
+        elif svd_solver == "randomized":
+            # sign flipping is done inside
+            U, S, Vt = _randomized_svd(
+                X_centered,
+                n_components=n_components,
+                n_oversamples=self.n_oversamples,
+                n_iter=self.iterated_power,
+                power_iteration_normalizer=self.power_iteration_normalizer,
+                flip_sign=False,
+                random_state=random_state,
+            )
+            U, Vt = svd_flip(U, Vt, u_based_decision=False)
+
+        self.n_samples_ = n_samples
+        self.components_ = Vt
+        self.n_components_ = n_components
+
+        # Get variance explained by singular values
+        self.explained_variance_ = (S**2) / (n_samples - 1)
+
+        # Workaround in-place variance calculation since at the time numpy
+        # did not have a way to calculate variance in-place.
+        #
+        # TODO: update this code to either:
+        # * Use the array-api variance calculation, unless memory usage suffers
+        # * Update sklearn.utils.extmath._incremental_mean_and_var to support array-api
+        # See: https://github.com/scikit-learn/scikit-learn/pull/18689#discussion_r1335540991
+        if total_var is None:
+            N = X.shape[0] - 1
+            X_centered **= 2
+            total_var = xp.sum(X_centered) / N
+
+        self.explained_variance_ratio_ = self.explained_variance_ / total_var
+        self.singular_values_ = xp.asarray(S, copy=True)  # Store the singular values.
+
+        if self.n_components_ < min(n_features, n_samples):
+            self.noise_variance_ = total_var - xp.sum(self.explained_variance_)
+            self.noise_variance_ /= min(n_features, n_samples) - n_components
+        else:
+            self.noise_variance_ = 0.0
+
+        return U, S, Vt, X, x_is_centered, xp
+
+    def score_samples(self, X):
+        """Return the log-likelihood of each sample.
+
+        See. "Pattern Recognition and Machine Learning"
+        by C. Bishop, 12.2.1 p. 574
+        or http://www.miketipping.com/papers/met-mppca.pdf
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data.
+
+        Returns
+        -------
+        ll : ndarray of shape (n_samples,)
+            Log-likelihood of each sample under the current model.
+        """
+        check_is_fitted(self)
+        xp, _ = get_namespace(X)
+        X = validate_data(self, X, dtype=[xp.float64, xp.float32], reset=False)
+        Xr = X - self.mean_
+        n_features = X.shape[1]
+        precision = self.get_precision()
+        log_like = -0.5 * xp.sum(Xr * (Xr @ precision), axis=1)
+        log_like -= 0.5 * (n_features * log(2.0 * np.pi) - fast_logdet(precision))
+        return log_like
+
+    def score(self, X, y=None):
+        """Return the average log-likelihood of all samples.
+
+        See. "Pattern Recognition and Machine Learning"
+        by C. Bishop, 12.2.1 p. 574
+        or http://www.miketipping.com/papers/met-mppca.pdf
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data.
+
+        y : Ignored
+            Ignored.
+
+        Returns
+        -------
+        ll : float
+            Average log-likelihood of the samples under the current model.
+        """
+        xp, _ = get_namespace(X)
+        return float(xp.mean(self.score_samples(X)))
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        tags.array_api_support = True
+        tags.input_tags.sparse = self.svd_solver in (
+            "auto",
+            "arpack",
+            "covariance_eigh",
+        )
+        return tags
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_sparse_pca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_sparse_pca.py
@@ -0,0 +1,548 @@
+"""Matrix factorization with Sparse PCA."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from numbers import Integral, Real
+
+import numpy as np
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..linear_model import ridge_regression
+from ..utils import check_random_state
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import svd_flip
+from ..utils.validation import check_array, check_is_fitted, validate_data
+from ._dict_learning import MiniBatchDictionaryLearning, dict_learning
+
+
+class _BaseSparsePCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
+    """Base class for SparsePCA and MiniBatchSparsePCA"""
+
+    _parameter_constraints: dict = {
+        "n_components": [None, Interval(Integral, 1, None, closed="left")],
+        "alpha": [Interval(Real, 0.0, None, closed="left")],
+        "ridge_alpha": [Interval(Real, 0.0, None, closed="left")],
+        "max_iter": [Interval(Integral, 0, None, closed="left")],
+        "tol": [Interval(Real, 0.0, None, closed="left")],
+        "method": [StrOptions({"lars", "cd"})],
+        "n_jobs": [Integral, None],
+        "verbose": ["verbose"],
+        "random_state": ["random_state"],
+    }
+
+    def __init__(
+        self,
+        n_components=None,
+        *,
+        alpha=1,
+        ridge_alpha=0.01,
+        max_iter=1000,
+        tol=1e-8,
+        method="lars",
+        n_jobs=None,
+        verbose=False,
+        random_state=None,
+    ):
+        self.n_components = n_components
+        self.alpha = alpha
+        self.ridge_alpha = ridge_alpha
+        self.max_iter = max_iter
+        self.tol = tol
+        self.method = method
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+        self.random_state = random_state
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None):
+        """Fit the model from data in X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            Training vector, where `n_samples` is the number of samples
+            and `n_features` is the number of features.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        random_state = check_random_state(self.random_state)
+        X = validate_data(self, X)
+
+        self.mean_ = X.mean(axis=0)
+        X = X - self.mean_
+
+        if self.n_components is None:
+            n_components = X.shape[1]
+        else:
+            n_components = self.n_components
+
+        return self._fit(X, n_components, random_state)
+
+    def transform(self, X):
+        """Least Squares projection of the data onto the sparse components.
+
+        To avoid instability issues in case the system is under-determined,
+        regularization can be applied (Ridge regression) via the
+        `ridge_alpha` parameter.
+
+        Note that Sparse PCA components orthogonality is not enforced as in PCA
+        hence one cannot use a simple linear projection.
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_features)
+            Test data to be transformed, must have the same number of
+            features as the data used to train the model.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Transformed data.
+        """
+        check_is_fitted(self)
+
+        X = validate_data(self, X, reset=False)
+        X = X - self.mean_
+
+        U = ridge_regression(
+            self.components_.T, X.T, self.ridge_alpha, solver="cholesky"
+        )
+
+        return U
+
+    def inverse_transform(self, X):
+        """Transform data from the latent space to the original space.
+
+        This inversion is an approximation due to the loss of information
+        induced by the forward decomposition.
+
+        .. versionadded:: 1.2
+
+        Parameters
+        ----------
+        X : ndarray of shape (n_samples, n_components)
+            Data in the latent space.
+
+        Returns
+        -------
+        X_original : ndarray of shape (n_samples, n_features)
+            Reconstructed data in the original space.
+        """
+        check_is_fitted(self)
+        X = check_array(X)
+
+        return (X @ self.components_) + self.mean_
+
+    @property
+    def _n_features_out(self):
+        """Number of transformed output features."""
+        return self.components_.shape[0]
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        return tags
+
+
+class SparsePCA(_BaseSparsePCA):
+    """Sparse Principal Components Analysis (SparsePCA).
+
+    Finds the set of sparse components that can optimally reconstruct
+    the data.  The amount of sparseness is controllable by the coefficient
+    of the L1 penalty, given by the parameter alpha.
+
+    Read more in the :ref:`User Guide <SparsePCA>`.
+
+    Parameters
+    ----------
+    n_components : int, default=None
+        Number of sparse atoms to extract. If None, then ``n_components``
+        is set to ``n_features``.
+
+    alpha : float, default=1
+        Sparsity controlling parameter. Higher values lead to sparser
+        components.
+
+    ridge_alpha : float, default=0.01
+        Amount of ridge shrinkage to apply in order to improve
+        conditioning when calling the transform method.
+
+    max_iter : int, default=1000
+        Maximum number of iterations to perform.
+
+    tol : float, default=1e-8
+        Tolerance for the stopping condition.
+
+    method : {'lars', 'cd'}, default='lars'
+        Method to be used for optimization.
+        lars: uses the least angle regression method to solve the lasso problem
+        (linear_model.lars_path)
+        cd: uses the coordinate descent method to compute the
+        Lasso solution (linear_model.Lasso). Lars will be faster if
+        the estimated components are sparse.
+
+    n_jobs : int, default=None
+        Number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    U_init : ndarray of shape (n_samples, n_components), default=None
+        Initial values for the loadings for warm restart scenarios. Only used
+        if `U_init` and `V_init` are not None.
+
+    V_init : ndarray of shape (n_components, n_features), default=None
+        Initial values for the components for warm restart scenarios. Only used
+        if `U_init` and `V_init` are not None.
+
+    verbose : int or bool, default=False
+        Controls the verbosity; the higher, the more messages. Defaults to 0.
+
+    random_state : int, RandomState instance or None, default=None
+        Used during dictionary learning. Pass an int for reproducible results
+        across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        Sparse components extracted from the data.
+
+    error_ : ndarray
+        Vector of errors at each iteration.
+
+    n_components_ : int
+        Estimated number of components.
+
+        .. versionadded:: 0.23
+
+    n_iter_ : int
+        Number of iterations run.
+
+    mean_ : ndarray of shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+        Equal to ``X.mean(axis=0)``.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    PCA : Principal Component Analysis implementation.
+    MiniBatchSparsePCA : Mini batch variant of `SparsePCA` that is faster but less
+        accurate.
+    DictionaryLearning : Generic dictionary learning problem using a sparse code.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.datasets import make_friedman1
+    >>> from sklearn.decomposition import SparsePCA
+    >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
+    >>> transformer = SparsePCA(n_components=5, random_state=0)
+    >>> transformer.fit(X)
+    SparsePCA(...)
+    >>> X_transformed = transformer.transform(X)
+    >>> X_transformed.shape
+    (200, 5)
+    >>> # most values in the components_ are zero (sparsity)
+    >>> np.mean(transformer.components_ == 0)
+    np.float64(0.9666)
+    """
+
+    _parameter_constraints: dict = {
+        **_BaseSparsePCA._parameter_constraints,
+        "U_init": [None, np.ndarray],
+        "V_init": [None, np.ndarray],
+    }
+
+    def __init__(
+        self,
+        n_components=None,
+        *,
+        alpha=1,
+        ridge_alpha=0.01,
+        max_iter=1000,
+        tol=1e-8,
+        method="lars",
+        n_jobs=None,
+        U_init=None,
+        V_init=None,
+        verbose=False,
+        random_state=None,
+    ):
+        super().__init__(
+            n_components=n_components,
+            alpha=alpha,
+            ridge_alpha=ridge_alpha,
+            max_iter=max_iter,
+            tol=tol,
+            method=method,
+            n_jobs=n_jobs,
+            verbose=verbose,
+            random_state=random_state,
+        )
+        self.U_init = U_init
+        self.V_init = V_init
+
+    def _fit(self, X, n_components, random_state):
+        """Specialized `fit` for SparsePCA."""
+
+        code_init = self.V_init.T if self.V_init is not None else None
+        dict_init = self.U_init.T if self.U_init is not None else None
+        code, dictionary, E, self.n_iter_ = dict_learning(
+            X.T,
+            n_components,
+            alpha=self.alpha,
+            tol=self.tol,
+            max_iter=self.max_iter,
+            method=self.method,
+            n_jobs=self.n_jobs,
+            verbose=self.verbose,
+            random_state=random_state,
+            code_init=code_init,
+            dict_init=dict_init,
+            return_n_iter=True,
+        )
+        # flip eigenvectors' sign to enforce deterministic output
+        code, dictionary = svd_flip(code, dictionary, u_based_decision=True)
+        self.components_ = code.T
+        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]
+        components_norm[components_norm == 0] = 1
+        self.components_ /= components_norm
+        self.n_components_ = len(self.components_)
+
+        self.error_ = E
+        return self
+
+
+class MiniBatchSparsePCA(_BaseSparsePCA):
+    """Mini-batch Sparse Principal Components Analysis.
+
+    Finds the set of sparse components that can optimally reconstruct
+    the data.  The amount of sparseness is controllable by the coefficient
+    of the L1 penalty, given by the parameter alpha.
+
+    For an example comparing sparse PCA to PCA, see
+    :ref:`sphx_glr_auto_examples_decomposition_plot_faces_decomposition.py`
+
+    Read more in the :ref:`User Guide <SparsePCA>`.
+
+    Parameters
+    ----------
+    n_components : int, default=None
+        Number of sparse atoms to extract. If None, then ``n_components``
+        is set to ``n_features``.
+
+    alpha : int, default=1
+        Sparsity controlling parameter. Higher values lead to sparser
+        components.
+
+    ridge_alpha : float, default=0.01
+        Amount of ridge shrinkage to apply in order to improve
+        conditioning when calling the transform method.
+
+    max_iter : int, default=1_000
+        Maximum number of iterations over the complete dataset before
+        stopping independently of any early stopping criterion heuristics.
+
+        .. versionadded:: 1.2
+
+    callback : callable, default=None
+        Callable that gets invoked every five iterations.
+
+    batch_size : int, default=3
+        The number of features to take in each mini batch.
+
+    verbose : int or bool, default=False
+        Controls the verbosity; the higher, the more messages. Defaults to 0.
+
+    shuffle : bool, default=True
+        Whether to shuffle the data before splitting it in batches.
+
+    n_jobs : int, default=None
+        Number of parallel jobs to run.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    method : {'lars', 'cd'}, default='lars'
+        Method to be used for optimization.
+        lars: uses the least angle regression method to solve the lasso problem
+        (linear_model.lars_path)
+        cd: uses the coordinate descent method to compute the
+        Lasso solution (linear_model.Lasso). Lars will be faster if
+        the estimated components are sparse.
+
+    random_state : int, RandomState instance or None, default=None
+        Used for random shuffling when ``shuffle`` is set to ``True``,
+        during online dictionary learning. Pass an int for reproducible results
+        across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    tol : float, default=1e-3
+        Control early stopping based on the norm of the differences in the
+        dictionary between 2 steps.
+
+        To disable early stopping based on changes in the dictionary, set
+        `tol` to 0.0.
+
+        .. versionadded:: 1.1
+
+    max_no_improvement : int or None, default=10
+        Control early stopping based on the consecutive number of mini batches
+        that does not yield an improvement on the smoothed cost function.
+
+        To disable convergence detection based on cost function, set
+        `max_no_improvement` to `None`.
+
+        .. versionadded:: 1.1
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        Sparse components extracted from the data.
+
+    n_components_ : int
+        Estimated number of components.
+
+        .. versionadded:: 0.23
+
+    n_iter_ : int
+        Number of iterations run.
+
+    mean_ : ndarray of shape (n_features,)
+        Per-feature empirical mean, estimated from the training set.
+        Equal to ``X.mean(axis=0)``.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    DictionaryLearning : Find a dictionary that sparsely encodes data.
+    IncrementalPCA : Incremental principal components analysis.
+    PCA : Principal component analysis.
+    SparsePCA : Sparse Principal Components Analysis.
+    TruncatedSVD : Dimensionality reduction using truncated SVD.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.datasets import make_friedman1
+    >>> from sklearn.decomposition import MiniBatchSparsePCA
+    >>> X, _ = make_friedman1(n_samples=200, n_features=30, random_state=0)
+    >>> transformer = MiniBatchSparsePCA(n_components=5, batch_size=50,
+    ...                                  max_iter=10, random_state=0)
+    >>> transformer.fit(X)
+    MiniBatchSparsePCA(...)
+    >>> X_transformed = transformer.transform(X)
+    >>> X_transformed.shape
+    (200, 5)
+    >>> # most values in the components_ are zero (sparsity)
+    >>> np.mean(transformer.components_ == 0)
+    np.float64(0.9)
+    """
+
+    _parameter_constraints: dict = {
+        **_BaseSparsePCA._parameter_constraints,
+        "max_iter": [Interval(Integral, 0, None, closed="left")],
+        "callback": [None, callable],
+        "batch_size": [Interval(Integral, 1, None, closed="left")],
+        "shuffle": ["boolean"],
+        "max_no_improvement": [Interval(Integral, 0, None, closed="left"), None],
+    }
+
+    def __init__(
+        self,
+        n_components=None,
+        *,
+        alpha=1,
+        ridge_alpha=0.01,
+        max_iter=1_000,
+        callback=None,
+        batch_size=3,
+        verbose=False,
+        shuffle=True,
+        n_jobs=None,
+        method="lars",
+        random_state=None,
+        tol=1e-3,
+        max_no_improvement=10,
+    ):
+        super().__init__(
+            n_components=n_components,
+            alpha=alpha,
+            ridge_alpha=ridge_alpha,
+            max_iter=max_iter,
+            tol=tol,
+            method=method,
+            n_jobs=n_jobs,
+            verbose=verbose,
+            random_state=random_state,
+        )
+        self.callback = callback
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.max_no_improvement = max_no_improvement
+
+    def _fit(self, X, n_components, random_state):
+        """Specialized `fit` for MiniBatchSparsePCA."""
+
+        transform_algorithm = "lasso_" + self.method
+        est = MiniBatchDictionaryLearning(
+            n_components=n_components,
+            alpha=self.alpha,
+            max_iter=self.max_iter,
+            dict_init=None,
+            batch_size=self.batch_size,
+            shuffle=self.shuffle,
+            n_jobs=self.n_jobs,
+            fit_algorithm=self.method,
+            random_state=random_state,
+            transform_algorithm=transform_algorithm,
+            transform_alpha=self.alpha,
+            verbose=self.verbose,
+            callback=self.callback,
+            tol=self.tol,
+            max_no_improvement=self.max_no_improvement,
+        )
+        est.set_output(transform="default")
+        est.fit(X.T)
+
+        self.components_, self.n_iter_ = est.transform(X.T).T, est.n_iter_
+
+        components_norm = np.linalg.norm(self.components_, axis=1)[:, np.newaxis]
+        components_norm[components_norm == 0] = 1
+        self.components_ /= components_norm
+        self.n_components_ = len(self.components_)
+
+        return self
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/_truncated_svd.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/_truncated_svd.py
@@ -0,0 +1,322 @@
+"""Truncated SVD for sparse matrices, aka latent semantic analysis (LSA)."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from numbers import Integral, Real
+
+import numpy as np
+import scipy.sparse as sp
+from scipy.sparse.linalg import svds
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..utils import check_array, check_random_state
+from ..utils._arpack import _init_arpack_v0
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import _randomized_svd, safe_sparse_dot, svd_flip
+from ..utils.sparsefuncs import mean_variance_axis
+from ..utils.validation import check_is_fitted, validate_data
+
+__all__ = ["TruncatedSVD"]
+
+
+class TruncatedSVD(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
+    """Dimensionality reduction using truncated SVD (aka LSA).
+
+    This transformer performs linear dimensionality reduction by means of
+    truncated singular value decomposition (SVD). Contrary to PCA, this
+    estimator does not center the data before computing the singular value
+    decomposition. This means it can work with sparse matrices
+    efficiently.
+
+    In particular, truncated SVD works on term count/tf-idf matrices as
+    returned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In
+    that context, it is known as latent semantic analysis (LSA).
+
+    This estimator supports two algorithms: a fast randomized SVD solver, and
+    a "naive" algorithm that uses ARPACK as an eigensolver on `X * X.T` or
+    `X.T * X`, whichever is more efficient.
+
+    Read more in the :ref:`User Guide <LSA>`.
+
+    Parameters
+    ----------
+    n_components : int, default=2
+        Desired dimensionality of output data.
+        If algorithm='arpack', must be strictly less than the number of features.
+        If algorithm='randomized', must be less than or equal to the number of features.
+        The default value is useful for visualisation. For LSA, a value of
+        100 is recommended.
+
+    algorithm : {'arpack', 'randomized'}, default='randomized'
+        SVD solver to use. Either "arpack" for the ARPACK wrapper in SciPy
+        (scipy.sparse.linalg.svds), or "randomized" for the randomized
+        algorithm due to Halko (2009).
+
+    n_iter : int, default=5
+        Number of iterations for randomized SVD solver. Not used by ARPACK. The
+        default is larger than the default in
+        :func:`~sklearn.utils.extmath.randomized_svd` to handle sparse
+        matrices that may have large slowly decaying spectrum.
+
+    n_oversamples : int, default=10
+        Number of oversamples for randomized SVD solver. Not used by ARPACK.
+        See :func:`~sklearn.utils.extmath.randomized_svd` for a complete
+        description.
+
+        .. versionadded:: 1.1
+
+    power_iteration_normalizer : {'auto', 'QR', 'LU', 'none'}, default='auto'
+        Power iteration normalizer for randomized SVD solver.
+        Not used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`
+        for more details.
+
+        .. versionadded:: 1.1
+
+    random_state : int, RandomState instance or None, default=None
+        Used during randomized svd. Pass an int for reproducible results across
+        multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    tol : float, default=0.0
+        Tolerance for ARPACK. 0 means machine precision. Ignored by randomized
+        SVD solver.
+
+    Attributes
+    ----------
+    components_ : ndarray of shape (n_components, n_features)
+        The right singular vectors of the input data.
+
+    explained_variance_ : ndarray of shape (n_components,)
+        The variance of the training samples transformed by a projection to
+        each component.
+
+    explained_variance_ratio_ : ndarray of shape (n_components,)
+        Percentage of variance explained by each of the selected components.
+
+    singular_values_ : ndarray of shape (n_components,)
+        The singular values corresponding to each of the selected components.
+        The singular values are equal to the 2-norms of the ``n_components``
+        variables in the lower-dimensional space.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    DictionaryLearning : Find a dictionary that sparsely encodes data.
+    FactorAnalysis : A simple linear generative model with
+        Gaussian latent variables.
+    IncrementalPCA : Incremental principal components analysis.
+    KernelPCA : Kernel Principal component analysis.
+    NMF : Non-Negative Matrix Factorization.
+    PCA : Principal component analysis.
+
+    Notes
+    -----
+    SVD suffers from a problem called "sign indeterminacy", which means the
+    sign of the ``components_`` and the output from transform depend on the
+    algorithm and random state. To work around this, fit instances of this
+    class to data once, then keep the instance around to do transformations.
+
+    References
+    ----------
+    :arxiv:`Halko, et al. (2009). "Finding structure with randomness:
+    Stochastic algorithms for constructing approximate matrix decompositions"
+    <0909.4061>`
+
+    Examples
+    --------
+    >>> from sklearn.decomposition import TruncatedSVD
+    >>> from scipy.sparse import csr_matrix
+    >>> import numpy as np
+    >>> np.random.seed(0)
+    >>> X_dense = np.random.rand(100, 100)
+    >>> X_dense[:, 2 * np.arange(50)] = 0
+    >>> X = csr_matrix(X_dense)
+    >>> svd = TruncatedSVD(n_components=5, n_iter=7, random_state=42)
+    >>> svd.fit(X)
+    TruncatedSVD(n_components=5, n_iter=7, random_state=42)
+    >>> print(svd.explained_variance_ratio_)
+    [0.0157 0.0512 0.0499 0.0479 0.0453]
+    >>> print(svd.explained_variance_ratio_.sum())
+    0.2102
+    >>> print(svd.singular_values_)
+    [35.2410  4.5981   4.5420  4.4486  4.3288]
+    """
+
+    _parameter_constraints: dict = {
+        "n_components": [Interval(Integral, 1, None, closed="left")],
+        "algorithm": [StrOptions({"arpack", "randomized"})],
+        "n_iter": [Interval(Integral, 0, None, closed="left")],
+        "n_oversamples": [Interval(Integral, 1, None, closed="left")],
+        "power_iteration_normalizer": [StrOptions({"auto", "OR", "LU", "none"})],
+        "random_state": ["random_state"],
+        "tol": [Interval(Real, 0, None, closed="left")],
+    }
+
+    def __init__(
+        self,
+        n_components=2,
+        *,
+        algorithm="randomized",
+        n_iter=5,
+        n_oversamples=10,
+        power_iteration_normalizer="auto",
+        random_state=None,
+        tol=0.0,
+    ):
+        self.algorithm = algorithm
+        self.n_components = n_components
+        self.n_iter = n_iter
+        self.n_oversamples = n_oversamples
+        self.power_iteration_normalizer = power_iteration_normalizer
+        self.random_state = random_state
+        self.tol = tol
+
+    def fit(self, X, y=None):
+        """Fit model on training data X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        self : object
+            Returns the transformer object.
+        """
+        self.fit_transform(X)
+        return self
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit_transform(self, X, y=None):
+        """Fit model to X and perform dimensionality reduction on X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training data.
+
+        y : Ignored
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Reduced version of X. This will always be a dense array.
+        """
+        X = validate_data(self, X, accept_sparse=["csr", "csc"], ensure_min_features=2)
+        random_state = check_random_state(self.random_state)
+
+        if self.algorithm == "arpack":
+            v0 = _init_arpack_v0(min(X.shape), random_state)
+            U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol, v0=v0)
+            # svds doesn't abide by scipy.linalg.svd/randomized_svd
+            # conventions, so reverse its outputs.
+            Sigma = Sigma[::-1]
+            # u_based_decision=False is needed to be consistent with PCA.
+            U, VT = svd_flip(U[:, ::-1], VT[::-1], u_based_decision=False)
+
+        elif self.algorithm == "randomized":
+            if self.n_components > X.shape[1]:
+                raise ValueError(
+                    f"n_components({self.n_components}) must be <="
+                    f" n_features({X.shape[1]})."
+                )
+            U, Sigma, VT = _randomized_svd(
+                X,
+                self.n_components,
+                n_iter=self.n_iter,
+                n_oversamples=self.n_oversamples,
+                power_iteration_normalizer=self.power_iteration_normalizer,
+                random_state=random_state,
+                flip_sign=False,
+            )
+            U, VT = svd_flip(U, VT, u_based_decision=False)
+
+        self.components_ = VT
+
+        # As a result of the SVD approximation error on X ~ U @ Sigma @ V.T,
+        # X @ V is not the same as U @ Sigma
+        if self.algorithm == "randomized" or (
+            self.algorithm == "arpack" and self.tol > 0
+        ):
+            X_transformed = safe_sparse_dot(X, self.components_.T)
+        else:
+            X_transformed = U * Sigma
+
+        # Calculate explained variance & explained variance ratio
+        self.explained_variance_ = exp_var = np.var(X_transformed, axis=0)
+        if sp.issparse(X):
+            _, full_var = mean_variance_axis(X, axis=0)
+            full_var = full_var.sum()
+        else:
+            full_var = np.var(X, axis=0).sum()
+        self.explained_variance_ratio_ = exp_var / full_var
+        self.singular_values_ = Sigma  # Store the singular values.
+
+        return X_transformed
+
+    def transform(self, X):
+        """Perform dimensionality reduction on X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            New data.
+
+        Returns
+        -------
+        X_new : ndarray of shape (n_samples, n_components)
+            Reduced version of X. This will always be a dense array.
+        """
+        check_is_fitted(self)
+        X = validate_data(self, X, accept_sparse=["csr", "csc"], reset=False)
+        return safe_sparse_dot(X, self.components_.T)
+
+    def inverse_transform(self, X):
+        """Transform X back to its original space.
+
+        Returns an array X_original whose transform would be X.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_components)
+            New data.
+
+        Returns
+        -------
+        X_original : ndarray of shape (n_samples, n_features)
+            Note that this is always a dense array.
+        """
+        X = check_array(X)
+        return np.dot(X, self.components_)
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        return tags
+
+    @property
+    def _n_features_out(self):
+        """Number of transformed output features."""
+        return self.components_.shape[0]
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/meson.build
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/meson.build
@@ -0,0 +1,14 @@
+py.extension_module(
+  '_online_lda_fast',
+  [cython_gen.process('_online_lda_fast.pyx'), utils_cython_tree],
+  subdir: 'sklearn/decomposition',
+  install: true
+)
+
+py.extension_module(
+  '_cdnmf_fast',
+  cython_gen.process('_cdnmf_fast.pyx'),
+  dependencies: [np_dep],
+  subdir: 'sklearn/decomposition',
+  install: true
+)
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/init.py
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_dict_learning.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_dict_learning.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_factor_analysis.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_factor_analysis.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_fastica.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_fastica.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_incremental_pca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_incremental_pca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_kernel_pca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_kernel_pca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_nmf.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_nmf.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_online_lda.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_online_lda.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_pca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_pca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_sparse_pca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_sparse_pca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_truncated_svd.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/pycache/test_truncated_svd.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_dict_learning.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_dict_learning.py
@@ -0,0 +1,988 @@
+import itertools
+import warnings
+from functools import partial
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.base import clone
+from sklearn.decomposition import (
+    DictionaryLearning,
+    MiniBatchDictionaryLearning,
+    SparseCoder,
+    dict_learning,
+    dict_learning_online,
+    sparse_encode,
+)
+from sklearn.decomposition._dict_learning import _update_dict
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils import check_array
+from sklearn.utils._testing import (
+    TempMemmap,
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.estimator_checks import (
+    check_transformer_data_not_an_array,
+    check_transformer_general,
+    check_transformers_unfitted,
+)
+from sklearn.utils.parallel import Parallel
+
+rng_global = np.random.RandomState(0)
+n_samples, n_features = 10, 8
+X = rng_global.randn(n_samples, n_features)
+
+
+def test_sparse_encode_shapes_omp():
+    rng = np.random.RandomState(0)
+    algorithms = ["omp", "lasso_lars", "lasso_cd", "lars", "threshold"]
+    for n_components, n_samples in itertools.product([1, 5], [1, 9]):
+        X_ = rng.randn(n_samples, n_features)
+        dictionary = rng.randn(n_components, n_features)
+        for algorithm, n_jobs in itertools.product(algorithms, [1, 2]):
+            code = sparse_encode(X_, dictionary, algorithm=algorithm, n_jobs=n_jobs)
+            assert code.shape == (n_samples, n_components)
+
+
+def test_dict_learning_shapes():
+    n_components = 5
+    dico = DictionaryLearning(n_components, random_state=0).fit(X)
+    assert dico.components_.shape == (n_components, n_features)
+
+    n_components = 1
+    dico = DictionaryLearning(n_components, random_state=0).fit(X)
+    assert dico.components_.shape == (n_components, n_features)
+    assert dico.transform(X).shape == (X.shape[0], n_components)
+
+
+def test_dict_learning_overcomplete():
+    n_components = 12
+    dico = DictionaryLearning(n_components, random_state=0).fit(X)
+    assert dico.components_.shape == (n_components, n_features)
+
+
+def test_max_iter():
+    def ricker_function(resolution, center, width):
+        """Discrete sub-sampled Ricker (Mexican hat) wavelet"""
+        x = np.linspace(0, resolution - 1, resolution)
+        x = (
+            (2 / (np.sqrt(3 * width) * np.pi**0.25))
+            * (1 - (x - center) ** 2 / width**2)
+            * np.exp(-((x - center) ** 2) / (2 * width**2))
+        )
+        return x
+
+    def ricker_matrix(width, resolution, n_components):
+        """Dictionary of Ricker (Mexican hat) wavelets"""
+        centers = np.linspace(0, resolution - 1, n_components)
+        D = np.empty((n_components, resolution))
+        for i, center in enumerate(centers):
+            D[i] = ricker_function(resolution, center, width)
+        D /= np.sqrt(np.sum(D**2, axis=1))[:, np.newaxis]
+        return D
+
+    transform_algorithm = "lasso_cd"
+    resolution = 1024
+    subsampling = 3  # subsampling factor
+    n_components = resolution // subsampling
+
+    # Compute a wavelet dictionary
+    D_multi = np.r_[
+        tuple(
+            ricker_matrix(
+                width=w, resolution=resolution, n_components=n_components // 5
+            )
+            for w in (10, 50, 100, 500, 1000)
+        )
+    ]
+
+    X = np.linspace(0, resolution - 1, resolution)
+    first_quarter = X < resolution / 4
+    X[first_quarter] = 3.0
+    X[np.logical_not(first_quarter)] = -1.0
+    X = X.reshape(1, -1)
+
+    # check that the underlying model fails to converge
+    with pytest.warns(ConvergenceWarning):
+        model = SparseCoder(
+            D_multi, transform_algorithm=transform_algorithm, transform_max_iter=1
+        )
+        model.fit_transform(X)
+
+    # check that the underlying model converges w/o warnings
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", ConvergenceWarning)
+        model = SparseCoder(
+            D_multi, transform_algorithm=transform_algorithm, transform_max_iter=2000
+        )
+        model.fit_transform(X)
+
+
+def test_dict_learning_lars_positive_parameter():
+    n_components = 5
+    alpha = 1
+    err_msg = "Positive constraint not supported for 'lars' coding method."
+    with pytest.raises(ValueError, match=err_msg):
+        dict_learning(X, n_components, alpha=alpha, positive_code=True)
+
+
+@pytest.mark.parametrize(
+    "transform_algorithm",
+    [
+        "lasso_lars",
+        "lasso_cd",
+        "threshold",
+    ],
+)
+@pytest.mark.parametrize("positive_code", [False, True])
+@pytest.mark.parametrize("positive_dict", [False, True])
+def test_dict_learning_positivity(transform_algorithm, positive_code, positive_dict):
+    n_components = 5
+    dico = DictionaryLearning(
+        n_components,
+        transform_algorithm=transform_algorithm,
+        random_state=0,
+        positive_code=positive_code,
+        positive_dict=positive_dict,
+        fit_algorithm="cd",
+    ).fit(X)
+
+    code = dico.transform(X)
+    if positive_dict:
+        assert (dico.components_ >= 0).all()
+    else:
+        assert (dico.components_ < 0).any()
+    if positive_code:
+        assert (code >= 0).all()
+    else:
+        assert (code < 0).any()
+
+
+@pytest.mark.parametrize("positive_dict", [False, True])
+def test_dict_learning_lars_dict_positivity(positive_dict):
+    n_components = 5
+    dico = DictionaryLearning(
+        n_components,
+        transform_algorithm="lars",
+        random_state=0,
+        positive_dict=positive_dict,
+        fit_algorithm="cd",
+    ).fit(X)
+
+    if positive_dict:
+        assert (dico.components_ >= 0).all()
+    else:
+        assert (dico.components_ < 0).any()
+
+
+def test_dict_learning_lars_code_positivity():
+    n_components = 5
+    dico = DictionaryLearning(
+        n_components,
+        transform_algorithm="lars",
+        random_state=0,
+        positive_code=True,
+        fit_algorithm="cd",
+    ).fit(X)
+
+    err_msg = "Positive constraint not supported for '{}' coding method."
+    err_msg = err_msg.format("lars")
+    with pytest.raises(ValueError, match=err_msg):
+        dico.transform(X)
+
+
+def test_dict_learning_reconstruction():
+    n_components = 12
+    dico = DictionaryLearning(
+        n_components, transform_algorithm="omp", transform_alpha=0.001, random_state=0
+    )
+    code = dico.fit(X).transform(X)
+    assert_array_almost_equal(np.dot(code, dico.components_), X)
+    assert_array_almost_equal(dico.inverse_transform(code), X)
+
+    dico.set_params(transform_algorithm="lasso_lars")
+    code = dico.transform(X)
+    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
+    assert_array_almost_equal(dico.inverse_transform(code), X, decimal=2)
+
+    # test error raised for wrong code size
+    with pytest.raises(ValueError, match="Expected 12, got 11."):
+        dico.inverse_transform(code[:, :-1])
+
+    # used to test lars here too, but there's no guarantee the number of
+    # nonzero atoms is right.
+
+
+def test_dict_learning_reconstruction_parallel():
+    # regression test that parallel reconstruction works with n_jobs>1
+    n_components = 12
+    dico = DictionaryLearning(
+        n_components,
+        transform_algorithm="omp",
+        transform_alpha=0.001,
+        random_state=0,
+        n_jobs=4,
+    )
+    code = dico.fit(X).transform(X)
+    assert_array_almost_equal(np.dot(code, dico.components_), X)
+
+    dico.set_params(transform_algorithm="lasso_lars")
+    code = dico.transform(X)
+    assert_array_almost_equal(np.dot(code, dico.components_), X, decimal=2)
+
+
+def test_dict_learning_lassocd_readonly_data():
+    n_components = 12
+    with TempMemmap(X) as X_read_only:
+        dico = DictionaryLearning(
+            n_components,
+            transform_algorithm="lasso_cd",
+            transform_alpha=0.001,
+            random_state=0,
+            n_jobs=4,
+        )
+        with ignore_warnings(category=ConvergenceWarning):
+            code = dico.fit(X_read_only).transform(X_read_only)
+        assert_array_almost_equal(
+            np.dot(code, dico.components_), X_read_only, decimal=2
+        )
+
+
+def test_dict_learning_nonzero_coefs():
+    n_components = 4
+    dico = DictionaryLearning(
+        n_components,
+        transform_algorithm="lars",
+        transform_n_nonzero_coefs=3,
+        random_state=0,
+    )
+    code = dico.fit(X).transform(X[np.newaxis, 1])
+    assert len(np.flatnonzero(code)) == 3
+
+    dico.set_params(transform_algorithm="omp")
+    code = dico.transform(X[np.newaxis, 1])
+    assert len(np.flatnonzero(code)) == 3
+
+
+def test_dict_learning_split():
+    n_components = 5
+    dico = DictionaryLearning(
+        n_components, transform_algorithm="threshold", random_state=0
+    )
+    code = dico.fit(X).transform(X)
+    Xr = dico.inverse_transform(code)
+
+    dico.split_sign = True
+    split_code = dico.transform(X)
+
+    assert_array_almost_equal(
+        split_code[:, :n_components] - split_code[:, n_components:], code
+    )
+
+    Xr2 = dico.inverse_transform(split_code)
+    assert_array_almost_equal(Xr, Xr2)
+
+
+def test_dict_learning_online_shapes():
+    rng = np.random.RandomState(0)
+    n_components = 8
+
+    code, dictionary = dict_learning_online(
+        X,
+        n_components=n_components,
+        batch_size=4,
+        max_iter=10,
+        method="cd",
+        random_state=rng,
+        return_code=True,
+    )
+    assert code.shape == (n_samples, n_components)
+    assert dictionary.shape == (n_components, n_features)
+    assert np.dot(code, dictionary).shape == X.shape
+
+    dictionary = dict_learning_online(
+        X,
+        n_components=n_components,
+        batch_size=4,
+        max_iter=10,
+        method="cd",
+        random_state=rng,
+        return_code=False,
+    )
+    assert dictionary.shape == (n_components, n_features)
+
+
+def test_dict_learning_online_lars_positive_parameter():
+    err_msg = "Positive constraint not supported for 'lars' coding method."
+    with pytest.raises(ValueError, match=err_msg):
+        dict_learning_online(X, batch_size=4, max_iter=10, positive_code=True)
+
+
+@pytest.mark.parametrize(
+    "transform_algorithm",
+    [
+        "lasso_lars",
+        "lasso_cd",
+        "threshold",
+    ],
+)
+@pytest.mark.parametrize("positive_code", [False, True])
+@pytest.mark.parametrize("positive_dict", [False, True])
+def test_minibatch_dictionary_learning_positivity(
+    transform_algorithm, positive_code, positive_dict
+):
+    n_components = 8
+    dico = MiniBatchDictionaryLearning(
+        n_components,
+        batch_size=4,
+        max_iter=10,
+        transform_algorithm=transform_algorithm,
+        random_state=0,
+        positive_code=positive_code,
+        positive_dict=positive_dict,
+        fit_algorithm="cd",
+    ).fit(X)
+
+    code = dico.transform(X)
+    if positive_dict:
+        assert (dico.components_ >= 0).all()
+    else:
+        assert (dico.components_ < 0).any()
+    if positive_code:
+        assert (code >= 0).all()
+    else:
+        assert (code < 0).any()
+
+
+@pytest.mark.parametrize("positive_dict", [False, True])
+def test_minibatch_dictionary_learning_lars(positive_dict):
+    n_components = 8
+
+    dico = MiniBatchDictionaryLearning(
+        n_components,
+        batch_size=4,
+        max_iter=10,
+        transform_algorithm="lars",
+        random_state=0,
+        positive_dict=positive_dict,
+        fit_algorithm="cd",
+    ).fit(X)
+
+    if positive_dict:
+        assert (dico.components_ >= 0).all()
+    else:
+        assert (dico.components_ < 0).any()
+
+
+@pytest.mark.parametrize("positive_code", [False, True])
+@pytest.mark.parametrize("positive_dict", [False, True])
+def test_dict_learning_online_positivity(positive_code, positive_dict):
+    rng = np.random.RandomState(0)
+    n_components = 8
+
+    code, dictionary = dict_learning_online(
+        X,
+        n_components=n_components,
+        batch_size=4,
+        method="cd",
+        alpha=1,
+        random_state=rng,
+        positive_dict=positive_dict,
+        positive_code=positive_code,
+    )
+    if positive_dict:
+        assert (dictionary >= 0).all()
+    else:
+        assert (dictionary < 0).any()
+    if positive_code:
+        assert (code >= 0).all()
+    else:
+        assert (code < 0).any()
+
+
+def test_dict_learning_online_verbosity():
+    # test verbosity for better coverage
+    n_components = 5
+    import sys
+    from io import StringIO
+
+    old_stdout = sys.stdout
+    try:
+        sys.stdout = StringIO()
+
+        # convergence monitoring verbosity
+        dico = MiniBatchDictionaryLearning(
+            n_components, batch_size=4, max_iter=5, verbose=1, tol=0.1, random_state=0
+        )
+        dico.fit(X)
+        dico = MiniBatchDictionaryLearning(
+            n_components,
+            batch_size=4,
+            max_iter=5,
+            verbose=1,
+            max_no_improvement=2,
+            random_state=0,
+        )
+        dico.fit(X)
+        # higher verbosity level
+        dico = MiniBatchDictionaryLearning(
+            n_components, batch_size=4, max_iter=5, verbose=2, random_state=0
+        )
+        dico.fit(X)
+
+        # function API verbosity
+        dict_learning_online(
+            X,
+            n_components=n_components,
+            batch_size=4,
+            alpha=1,
+            verbose=1,
+            random_state=0,
+        )
+        dict_learning_online(
+            X,
+            n_components=n_components,
+            batch_size=4,
+            alpha=1,
+            verbose=2,
+            random_state=0,
+        )
+    finally:
+        sys.stdout = old_stdout
+
+    assert dico.components_.shape == (n_components, n_features)
+
+
+def test_dict_learning_online_estimator_shapes():
+    n_components = 5
+    dico = MiniBatchDictionaryLearning(
+        n_components, batch_size=4, max_iter=5, random_state=0
+    )
+    dico.fit(X)
+    assert dico.components_.shape == (n_components, n_features)
+
+
+def test_dict_learning_online_overcomplete():
+    n_components = 12
+    dico = MiniBatchDictionaryLearning(
+        n_components, batch_size=4, max_iter=5, random_state=0
+    ).fit(X)
+    assert dico.components_.shape == (n_components, n_features)
+
+
+def test_dict_learning_online_initialization():
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)
+    dico = MiniBatchDictionaryLearning(
+        n_components, batch_size=4, max_iter=0, dict_init=V, random_state=0
+    ).fit(X)
+    assert_array_equal(dico.components_, V)
+
+
+def test_dict_learning_online_readonly_initialization():
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)
+    V.setflags(write=False)
+    MiniBatchDictionaryLearning(
+        n_components,
+        batch_size=4,
+        max_iter=1,
+        dict_init=V,
+        random_state=0,
+        shuffle=False,
+    ).fit(X)
+
+
+def test_dict_learning_online_partial_fit():
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)  # random init
+    V /= np.sum(V**2, axis=1)[:, np.newaxis]
+    dict1 = MiniBatchDictionaryLearning(
+        n_components,
+        max_iter=10,
+        batch_size=1,
+        alpha=1,
+        shuffle=False,
+        dict_init=V,
+        max_no_improvement=None,
+        tol=0.0,
+        random_state=0,
+    ).fit(X)
+    dict2 = MiniBatchDictionaryLearning(
+        n_components, alpha=1, dict_init=V, random_state=0
+    )
+    for i in range(10):
+        for sample in X:
+            dict2.partial_fit(sample[np.newaxis, :])
+
+    assert not np.all(sparse_encode(X, dict1.components_, alpha=1) == 0)
+    assert_array_almost_equal(dict1.components_, dict2.components_, decimal=2)
+
+    # partial_fit should ignore max_iter (#17433)
+    assert dict1.n_steps_ == dict2.n_steps_ == 100
+
+
+def test_sparse_encode_shapes():
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)  # random init
+    V /= np.sum(V**2, axis=1)[:, np.newaxis]
+    for algo in ("lasso_lars", "lasso_cd", "lars", "omp", "threshold"):
+        code = sparse_encode(X, V, algorithm=algo)
+        assert code.shape == (n_samples, n_components)
+
+
+@pytest.mark.parametrize("algo", ["lasso_lars", "lasso_cd", "threshold"])
+@pytest.mark.parametrize("positive", [False, True])
+def test_sparse_encode_positivity(algo, positive):
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)  # random init
+    V /= np.sum(V**2, axis=1)[:, np.newaxis]
+    code = sparse_encode(X, V, algorithm=algo, positive=positive)
+    if positive:
+        assert (code >= 0).all()
+    else:
+        assert (code < 0).any()
+
+
+@pytest.mark.parametrize("algo", ["lars", "omp"])
+def test_sparse_encode_unavailable_positivity(algo):
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)  # random init
+    V /= np.sum(V**2, axis=1)[:, np.newaxis]
+    err_msg = "Positive constraint not supported for '{}' coding method."
+    err_msg = err_msg.format(algo)
+    with pytest.raises(ValueError, match=err_msg):
+        sparse_encode(X, V, algorithm=algo, positive=True)
+
+
+def test_sparse_encode_input():
+    n_components = 100
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)  # random init
+    V /= np.sum(V**2, axis=1)[:, np.newaxis]
+    Xf = check_array(X, order="F")
+    for algo in ("lasso_lars", "lasso_cd", "lars", "omp", "threshold"):
+        a = sparse_encode(X, V, algorithm=algo)
+        b = sparse_encode(Xf, V, algorithm=algo)
+        assert_array_almost_equal(a, b)
+
+
+def test_sparse_encode_error():
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)  # random init
+    V /= np.sum(V**2, axis=1)[:, np.newaxis]
+    code = sparse_encode(X, V, alpha=0.001)
+    assert not np.all(code == 0)
+    assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1
+
+
+def test_sparse_encode_error_default_sparsity():
+    rng = np.random.RandomState(0)
+    X = rng.randn(100, 64)
+    D = rng.randn(2, 64)
+    code = ignore_warnings(sparse_encode)(X, D, algorithm="omp", n_nonzero_coefs=None)
+    assert code.shape == (100, 2)
+
+
+def test_sparse_coder_estimator():
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)  # random init
+    V /= np.sum(V**2, axis=1)[:, np.newaxis]
+    coder = SparseCoder(
+        dictionary=V, transform_algorithm="lasso_lars", transform_alpha=0.001
+    )
+    code = coder.fit_transform(X)
+    Xr = coder.inverse_transform(code)
+    assert not np.all(code == 0)
+    assert np.sqrt(np.sum((np.dot(code, V) - X) ** 2)) < 0.1
+    np.testing.assert_allclose(Xr, np.dot(code, V))
+
+
+def test_sparse_coder_estimator_clone():
+    n_components = 12
+    rng = np.random.RandomState(0)
+    V = rng.randn(n_components, n_features)  # random init
+    V /= np.sum(V**2, axis=1)[:, np.newaxis]
+    coder = SparseCoder(
+        dictionary=V, transform_algorithm="lasso_lars", transform_alpha=0.001
+    )
+    cloned = clone(coder)
+    assert id(cloned) != id(coder)
+    np.testing.assert_allclose(cloned.dictionary, coder.dictionary)
+    assert id(cloned.dictionary) != id(coder.dictionary)
+    assert cloned.n_components_ == coder.n_components_
+    assert cloned.n_features_in_ == coder.n_features_in_
+    data = np.random.rand(n_samples, n_features).astype(np.float32)
+    np.testing.assert_allclose(cloned.transform(data), coder.transform(data))
+
+
+def test_sparse_coder_parallel_mmap():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/5956
+    # Test that SparseCoder does not error by passing reading only
+    # arrays to child processes
+
+    rng = np.random.RandomState(777)
+    n_components, n_features = 40, 64
+    init_dict = rng.rand(n_components, n_features)
+    # Ensure that `data` is >2M. Joblib memory maps arrays
+    # if they are larger than 1MB. The 4 accounts for float32
+    # data type
+    n_samples = int(2e6) // (4 * n_features)
+    data = np.random.rand(n_samples, n_features).astype(np.float32)
+
+    sc = SparseCoder(init_dict, transform_algorithm="omp", n_jobs=2)
+    sc.fit_transform(data)
+
+
+def test_sparse_coder_common_transformer():
+    rng = np.random.RandomState(777)
+    n_components, n_features = 40, 3
+    init_dict = rng.rand(n_components, n_features)
+
+    sc = SparseCoder(init_dict)
+
+    check_transformer_data_not_an_array(sc.__class__.__name__, sc)
+    check_transformer_general(sc.__class__.__name__, sc)
+    check_transformer_general_memmap = partial(
+        check_transformer_general, readonly_memmap=True
+    )
+    check_transformer_general_memmap(sc.__class__.__name__, sc)
+    check_transformers_unfitted(sc.__class__.__name__, sc)
+
+
+def test_sparse_coder_n_features_in():
+    d = np.array([[1, 2, 3], [1, 2, 3]])
+    sc = SparseCoder(d)
+    assert sc.n_features_in_ == d.shape[1]
+
+
+def test_update_dict():
+    # Check the dict update in batch mode vs online mode
+    # Non-regression test for #4866
+    rng = np.random.RandomState(0)
+
+    code = np.array([[0.5, -0.5], [0.1, 0.9]])
+    dictionary = np.array([[1.0, 0.0], [0.6, 0.8]])
+
+    X = np.dot(code, dictionary) + rng.randn(2, 2)
+
+    # full batch update
+    newd_batch = dictionary.copy()
+    _update_dict(newd_batch, X, code)
+
+    # online update
+    A = np.dot(code.T, code)
+    B = np.dot(X.T, code)
+    newd_online = dictionary.copy()
+    _update_dict(newd_online, X, code, A, B)
+
+    assert_allclose(newd_batch, newd_online)
+
+
+@pytest.mark.parametrize(
+    "algorithm", ("lasso_lars", "lasso_cd", "lars", "threshold", "omp")
+)
+@pytest.mark.parametrize("data_type", (np.float32, np.float64))
+# Note: do not check integer input because `lasso_lars` and `lars` fail with
+# `ValueError` in `_lars_path_solver`
+def test_sparse_encode_dtype_match(data_type, algorithm):
+    n_components = 6
+    rng = np.random.RandomState(0)
+    dictionary = rng.randn(n_components, n_features)
+    code = sparse_encode(
+        X.astype(data_type), dictionary.astype(data_type), algorithm=algorithm
+    )
+    assert code.dtype == data_type
+
+
+@pytest.mark.parametrize(
+    "algorithm", ("lasso_lars", "lasso_cd", "lars", "threshold", "omp")
+)
+def test_sparse_encode_numerical_consistency(algorithm):
+    # verify numerical consistency among np.float32 and np.float64
+    rtol = 1e-4
+    n_components = 6
+    rng = np.random.RandomState(0)
+    dictionary = rng.randn(n_components, n_features)
+    code_32 = sparse_encode(
+        X.astype(np.float32), dictionary.astype(np.float32), algorithm=algorithm
+    )
+    code_64 = sparse_encode(
+        X.astype(np.float64), dictionary.astype(np.float64), algorithm=algorithm
+    )
+    assert_allclose(code_32, code_64, rtol=rtol)
+
+
+@pytest.mark.parametrize(
+    "transform_algorithm", ("lasso_lars", "lasso_cd", "lars", "threshold", "omp")
+)
+@pytest.mark.parametrize("data_type", (np.float32, np.float64))
+# Note: do not check integer input because `lasso_lars` and `lars` fail with
+# `ValueError` in `_lars_path_solver`
+def test_sparse_coder_dtype_match(data_type, transform_algorithm):
+    # Verify preserving dtype for transform in sparse coder
+    n_components = 6
+    rng = np.random.RandomState(0)
+    dictionary = rng.randn(n_components, n_features)
+    coder = SparseCoder(
+        dictionary.astype(data_type), transform_algorithm=transform_algorithm
+    )
+    code = coder.transform(X.astype(data_type))
+    assert code.dtype == data_type
+
+
+@pytest.mark.parametrize("fit_algorithm", ("lars", "cd"))
+@pytest.mark.parametrize(
+    "transform_algorithm", ("lasso_lars", "lasso_cd", "lars", "threshold", "omp")
+)
+@pytest.mark.parametrize(
+    "data_type, expected_type",
+    (
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ),
+)
+def test_dictionary_learning_dtype_match(
+    data_type,
+    expected_type,
+    fit_algorithm,
+    transform_algorithm,
+):
+    # Verify preserving dtype for fit and transform in dictionary learning class
+    dict_learner = DictionaryLearning(
+        n_components=8,
+        fit_algorithm=fit_algorithm,
+        transform_algorithm=transform_algorithm,
+        random_state=0,
+    )
+    dict_learner.fit(X.astype(data_type))
+    assert dict_learner.components_.dtype == expected_type
+    assert dict_learner.transform(X.astype(data_type)).dtype == expected_type
+
+
+@pytest.mark.parametrize("fit_algorithm", ("lars", "cd"))
+@pytest.mark.parametrize(
+    "transform_algorithm", ("lasso_lars", "lasso_cd", "lars", "threshold", "omp")
+)
+@pytest.mark.parametrize(
+    "data_type, expected_type",
+    (
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ),
+)
+def test_minibatch_dictionary_learning_dtype_match(
+    data_type,
+    expected_type,
+    fit_algorithm,
+    transform_algorithm,
+):
+    # Verify preserving dtype for fit and transform in minibatch dictionary learning
+    dict_learner = MiniBatchDictionaryLearning(
+        n_components=8,
+        batch_size=10,
+        fit_algorithm=fit_algorithm,
+        transform_algorithm=transform_algorithm,
+        max_iter=100,
+        tol=1e-1,
+        random_state=0,
+    )
+    dict_learner.fit(X.astype(data_type))
+
+    assert dict_learner.components_.dtype == expected_type
+    assert dict_learner.transform(X.astype(data_type)).dtype == expected_type
+    assert dict_learner._A.dtype == expected_type
+    assert dict_learner._B.dtype == expected_type
+
+
+@pytest.mark.parametrize("method", ("lars", "cd"))
+@pytest.mark.parametrize(
+    "data_type, expected_type",
+    (
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ),
+)
+def test_dict_learning_dtype_match(data_type, expected_type, method):
+    # Verify output matrix dtype
+    rng = np.random.RandomState(0)
+    n_components = 8
+    code, dictionary, _ = dict_learning(
+        X.astype(data_type),
+        n_components=n_components,
+        alpha=1,
+        random_state=rng,
+        method=method,
+    )
+    assert code.dtype == expected_type
+    assert dictionary.dtype == expected_type
+
+
+@pytest.mark.parametrize("method", ("lars", "cd"))
+def test_dict_learning_numerical_consistency(method):
+    # verify numerically consistent among np.float32 and np.float64
+    rtol = 1e-6
+    n_components = 4
+    alpha = 2
+
+    U_64, V_64, _ = dict_learning(
+        X.astype(np.float64),
+        n_components=n_components,
+        alpha=alpha,
+        random_state=0,
+        method=method,
+    )
+    U_32, V_32, _ = dict_learning(
+        X.astype(np.float32),
+        n_components=n_components,
+        alpha=alpha,
+        random_state=0,
+        method=method,
+    )
+
+    # Optimal solution (U*, V*) is not unique.
+    # If (U*, V*) is optimal solution, (-U*,-V*) is also optimal,
+    # and (column permutated U*, row permutated V*) are also optional
+    # as long as holding UV.
+    # So here UV, ||U||_1,1 and sum(||V_k||_2^2) are verified
+    # instead of comparing directly U and V.
+    assert_allclose(np.matmul(U_64, V_64), np.matmul(U_32, V_32), rtol=rtol)
+    assert_allclose(np.sum(np.abs(U_64)), np.sum(np.abs(U_32)), rtol=rtol)
+    assert_allclose(np.sum(V_64**2), np.sum(V_32**2), rtol=rtol)
+    # verify an obtained solution is not degenerate
+    assert np.mean(U_64 != 0.0) > 0.05
+    assert np.count_nonzero(U_64 != 0.0) == np.count_nonzero(U_32 != 0.0)
+
+
+@pytest.mark.parametrize("method", ("lars", "cd"))
+@pytest.mark.parametrize(
+    "data_type, expected_type",
+    (
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ),
+)
+def test_dict_learning_online_dtype_match(data_type, expected_type, method):
+    # Verify output matrix dtype
+    rng = np.random.RandomState(0)
+    n_components = 8
+    code, dictionary = dict_learning_online(
+        X.astype(data_type),
+        n_components=n_components,
+        alpha=1,
+        batch_size=10,
+        random_state=rng,
+        method=method,
+    )
+    assert code.dtype == expected_type
+    assert dictionary.dtype == expected_type
+
+
+@pytest.mark.parametrize("method", ("lars", "cd"))
+def test_dict_learning_online_numerical_consistency(method):
+    # verify numerically consistent among np.float32 and np.float64
+    rtol = 1e-4
+    n_components = 4
+    alpha = 1
+
+    U_64, V_64 = dict_learning_online(
+        X.astype(np.float64),
+        n_components=n_components,
+        max_iter=1_000,
+        alpha=alpha,
+        batch_size=10,
+        random_state=0,
+        method=method,
+        tol=0.0,
+        max_no_improvement=None,
+    )
+    U_32, V_32 = dict_learning_online(
+        X.astype(np.float32),
+        n_components=n_components,
+        max_iter=1_000,
+        alpha=alpha,
+        batch_size=10,
+        random_state=0,
+        method=method,
+        tol=0.0,
+        max_no_improvement=None,
+    )
+
+    # Optimal solution (U*, V*) is not unique.
+    # If (U*, V*) is optimal solution, (-U*,-V*) is also optimal,
+    # and (column permutated U*, row permutated V*) are also optional
+    # as long as holding UV.
+    # So here UV, ||U||_1,1 and sum(||V_k||_2) are verified
+    # instead of comparing directly U and V.
+    assert_allclose(np.matmul(U_64, V_64), np.matmul(U_32, V_32), rtol=rtol)
+    assert_allclose(np.sum(np.abs(U_64)), np.sum(np.abs(U_32)), rtol=rtol)
+    assert_allclose(np.sum(V_64**2), np.sum(V_32**2), rtol=rtol)
+    # verify an obtained solution is not degenerate
+    assert np.mean(U_64 != 0.0) > 0.05
+    assert np.count_nonzero(U_64 != 0.0) == np.count_nonzero(U_32 != 0.0)
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        SparseCoder(X.T),
+        DictionaryLearning(),
+        MiniBatchDictionaryLearning(batch_size=4, max_iter=10),
+    ],
+    ids=lambda x: x.__class__.__name__,
+)
+def test_get_feature_names_out(estimator):
+    """Check feature names for dict learning estimators."""
+    estimator.fit(X)
+    n_components = X.shape[1]
+
+    feature_names_out = estimator.get_feature_names_out()
+    estimator_name = estimator.__class__.__name__.lower()
+    assert_array_equal(
+        feature_names_out,
+        [f"{estimator_name}{i}" for i in range(n_components)],
+    )
+
+
+def test_cd_work_on_joblib_memmapped_data(monkeypatch):
+    monkeypatch.setattr(
+        sklearn.decomposition._dict_learning,
+        "Parallel",
+        partial(Parallel, max_nbytes=100),
+    )
+
+    rng = np.random.RandomState(0)
+    X_train = rng.randn(10, 10)
+
+    dict_learner = DictionaryLearning(
+        n_components=5,
+        random_state=0,
+        n_jobs=2,
+        fit_algorithm="cd",
+        max_iter=50,
+        verbose=True,
+    )
+
+    # This must run and complete without error.
+    dict_learner.fit(X_train)
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_factor_analysis.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_factor_analysis.py
@@ -0,0 +1,109 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from itertools import combinations
+
+import numpy as np
+import pytest
+
+from sklearn.decomposition import FactorAnalysis
+from sklearn.decomposition._factor_analysis import _ortho_rotation
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils._testing import assert_almost_equal, assert_array_almost_equal
+
+
+def test_factor_analysis(global_random_seed):
+    # Test FactorAnalysis ability to recover the data covariance structure
+    rng = np.random.RandomState(global_random_seed)
+    n_samples, n_features, n_components = 20, 5, 3
+
+    # Some random settings for the generative model
+    W = rng.randn(n_components, n_features)
+    # latent variable of dim 3, 20 of it
+    h = rng.randn(n_samples, n_components)
+    # using gamma to model different noise variance
+    # per component
+    noise = rng.gamma(1, size=n_features) * rng.randn(n_samples, n_features)
+
+    # generate observations
+    # wlog, mean is 0
+    X = np.dot(h, W) + noise
+
+    fas = []
+    for method in ["randomized", "lapack"]:
+        fa = FactorAnalysis(n_components=n_components, svd_method=method)
+        fa.fit(X)
+        fas.append(fa)
+
+        X_t = fa.transform(X)
+        assert X_t.shape == (n_samples, n_components)
+
+        assert_almost_equal(fa.loglike_[-1], fa.score_samples(X).sum())
+        assert_almost_equal(fa.score_samples(X).mean(), fa.score(X))
+
+        diff = np.all(np.diff(fa.loglike_))
+        assert diff > 0.0, "Log likelihood dif not increase"
+
+        # Sample Covariance
+        scov = np.cov(X, rowvar=0.0, bias=1.0)
+
+        # Model Covariance
+        mcov = fa.get_covariance()
+        diff = np.sum(np.abs(scov - mcov)) / W.size
+        assert diff < 0.2, "Mean absolute difference is %f" % diff
+        fa = FactorAnalysis(
+            n_components=n_components, noise_variance_init=np.ones(n_features)
+        )
+        with pytest.raises(ValueError):
+            fa.fit(X[:, :2])
+
+    def f(x, y):
+        return np.abs(getattr(x, y))  # sign will not be equal
+
+    fa1, fa2 = fas
+    for attr in ["loglike_", "components_", "noise_variance_"]:
+        assert_almost_equal(f(fa1, attr), f(fa2, attr))
+
+    fa1.max_iter = 1
+    fa1.verbose = True
+    with pytest.warns(ConvergenceWarning):
+        fa1.fit(X)
+
+    # Test get_covariance and get_precision with n_components == n_features
+    # with n_components < n_features and with n_components == 0
+    for n_components in [0, 2, X.shape[1]]:
+        fa.n_components = n_components
+        fa.fit(X)
+        cov = fa.get_covariance()
+        precision = fa.get_precision()
+        assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12)
+
+    # test rotation
+    n_components = 2
+
+    results, projections = {}, {}
+    for method in (None, "varimax", "quartimax"):
+        fa_var = FactorAnalysis(n_components=n_components, rotation=method)
+        results[method] = fa_var.fit_transform(X)
+        projections[method] = fa_var.get_covariance()
+    for rot1, rot2 in combinations([None, "varimax", "quartimax"], 2):
+        assert not np.allclose(results[rot1], results[rot2])
+        assert np.allclose(projections[rot1], projections[rot2], atol=3)
+
+    # test against R's psych::principal with rotate="varimax"
+    # (i.e., the values below stem from rotating the components in R)
+    # R's factor analysis returns quite different values; therefore, we only
+    # test the rotation itself
+    factors = np.array(
+        [
+            [0.89421016, -0.35854928, -0.27770122, 0.03773647],
+            [-0.45081822, -0.89132754, 0.0932195, -0.01787973],
+            [0.99500666, -0.02031465, 0.05426497, -0.11539407],
+            [0.96822861, -0.06299656, 0.24411001, 0.07540887],
+        ]
+    )
+    r_solution = np.array(
+        [[0.962, 0.052], [-0.141, 0.989], [0.949, -0.300], [0.937, -0.251]]
+    )
+    rotated = _ortho_rotation(factors[:, :n_components], method="varimax").T
+    assert_array_almost_equal(np.abs(rotated), np.abs(r_solution), decimal=3)
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_fastica.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_fastica.py
@@ -0,0 +1,457 @@
+"""
+Test the fastica algorithm.
+"""
+
+import itertools
+import os
+import warnings
+
+import numpy as np
+import pytest
+from scipy import stats
+
+from sklearn.decomposition import PCA, FastICA, fastica
+from sklearn.decomposition._fastica import _gs_decorrelation
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils._testing import assert_allclose, ignore_warnings
+
+
+def center_and_norm(x, axis=-1):
+    """Centers and norms x **in place**
+
+    Parameters
+    -----------
+    x: ndarray
+        Array with an axis of observations (statistical units) measured on
+        random variables.
+    axis: int, optional
+        Axis along which the mean and variance are calculated.
+    """
+    x = np.rollaxis(x, axis)
+    x -= x.mean(axis=0)
+    x /= x.std(axis=0)
+
+
+def test_gs(global_random_seed):
+    # Test gram schmidt orthonormalization
+    # generate a random orthogonal  matrix
+    rng = np.random.RandomState(global_random_seed)
+    W, _, _ = np.linalg.svd(rng.randn(10, 10))
+    w = rng.randn(10)
+    _gs_decorrelation(w, W, 10)
+    assert (w**2).sum() < 1.0e-10
+    w = rng.randn(10)
+    u = _gs_decorrelation(w, W, 5)
+    tmp = np.dot(u, W.T)
+    assert (tmp[:5] ** 2).sum() < 1.0e-10
+
+
+def test_fastica_attributes_dtypes(global_dtype):
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((100, 10)).astype(global_dtype, copy=False)
+    fica = FastICA(
+        n_components=5, max_iter=1000, whiten="unit-variance", random_state=0
+    ).fit(X)
+    assert fica.components_.dtype == global_dtype
+    assert fica.mixing_.dtype == global_dtype
+    assert fica.mean_.dtype == global_dtype
+    assert fica.whitening_.dtype == global_dtype
+
+
+def test_fastica_return_dtypes(global_dtype):
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((100, 10)).astype(global_dtype, copy=False)
+    k_, mixing_, s_ = fastica(
+        X, max_iter=1000, whiten="unit-variance", random_state=rng
+    )
+    assert k_.dtype == global_dtype
+    assert mixing_.dtype == global_dtype
+    assert s_.dtype == global_dtype
+
+
+@pytest.mark.parametrize("add_noise", [True, False])
+def test_fastica_simple(add_noise, global_random_seed, global_dtype):
+    if (
+        global_random_seed == 20
+        and global_dtype == np.float32
+        and not add_noise
+        and os.getenv("DISTRIB") == "ubuntu"
+    ):
+        pytest.xfail(
+            "FastICA instability with Ubuntu Atlas build with float32 "
+            "global_dtype. For more details, see "
+            "https://github.com/scikit-learn/scikit-learn/issues/24131#issuecomment-1208091119"
+        )
+
+    # Test the FastICA algorithm on very simple data.
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 1000
+    # Generate two sources:
+    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
+    s2 = stats.t.rvs(1, size=n_samples, random_state=global_random_seed)
+    s = np.c_[s1, s2].T
+    center_and_norm(s)
+    s = s.astype(global_dtype)
+    s1, s2 = s
+
+    # Mixing angle
+    phi = 0.6
+    mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]])
+    mixing = mixing.astype(global_dtype)
+    m = np.dot(mixing, s)
+
+    if add_noise:
+        m += 0.1 * rng.randn(2, 1000)
+
+    center_and_norm(m)
+
+    # function as fun arg
+    def g_test(x):
+        return x**3, (3 * x**2).mean(axis=-1)
+
+    algos = ["parallel", "deflation"]
+    nls = ["logcosh", "exp", "cube", g_test]
+    whitening = ["arbitrary-variance", "unit-variance", False]
+    for algo, nl, whiten in itertools.product(algos, nls, whitening):
+        if whiten:
+            k_, mixing_, s_ = fastica(
+                m.T, fun=nl, whiten=whiten, algorithm=algo, random_state=rng
+            )
+            with pytest.raises(ValueError):
+                fastica(m.T, fun=np.tanh, whiten=whiten, algorithm=algo)
+        else:
+            pca = PCA(n_components=2, whiten=True, random_state=rng)
+            X = pca.fit_transform(m.T)
+            k_, mixing_, s_ = fastica(
+                X, fun=nl, algorithm=algo, whiten=False, random_state=rng
+            )
+            with pytest.raises(ValueError):
+                fastica(X, fun=np.tanh, algorithm=algo)
+        s_ = s_.T
+        # Check that the mixing model described in the docstring holds:
+        if whiten:
+            # XXX: exact reconstruction to standard relative tolerance is not
+            # possible. This is probably expected when add_noise is True but we
+            # also need a non-trivial atol in float32 when add_noise is False.
+            #
+            # Note that the 2 sources are non-Gaussian in this test.
+            atol = 1e-5 if global_dtype == np.float32 else 0
+            assert_allclose(np.dot(np.dot(mixing_, k_), m), s_, atol=atol)
+
+        center_and_norm(s_)
+        s1_, s2_ = s_
+        # Check to see if the sources have been estimated
+        # in the wrong order
+        if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)):
+            s2_, s1_ = s_
+        s1_ *= np.sign(np.dot(s1_, s1))
+        s2_ *= np.sign(np.dot(s2_, s2))
+
+        # Check that we have estimated the original sources
+        if not add_noise:
+            assert_allclose(np.dot(s1_, s1) / n_samples, 1, atol=1e-2)
+            assert_allclose(np.dot(s2_, s2) / n_samples, 1, atol=1e-2)
+        else:
+            assert_allclose(np.dot(s1_, s1) / n_samples, 1, atol=1e-1)
+            assert_allclose(np.dot(s2_, s2) / n_samples, 1, atol=1e-1)
+
+    # Test FastICA class
+    _, _, sources_fun = fastica(
+        m.T, fun=nl, algorithm=algo, random_state=global_random_seed
+    )
+    ica = FastICA(fun=nl, algorithm=algo, random_state=global_random_seed)
+    sources = ica.fit_transform(m.T)
+    assert ica.components_.shape == (2, 2)
+    assert sources.shape == (1000, 2)
+
+    assert_allclose(sources_fun, sources)
+    # Set atol to account for the different magnitudes of the elements in sources
+    # (from 1e-4 to 1e1).
+    atol = np.max(np.abs(sources)) * (1e-5 if global_dtype == np.float32 else 1e-7)
+    assert_allclose(sources, ica.transform(m.T), atol=atol)
+
+    assert ica.mixing_.shape == (2, 2)
+
+    ica = FastICA(fun=np.tanh, algorithm=algo)
+    with pytest.raises(ValueError):
+        ica.fit(m.T)
+
+
+def test_fastica_nowhiten():
+    m = [[0, 1], [1, 0]]
+
+    # test for issue #697
+    ica = FastICA(n_components=1, whiten=False, random_state=0)
+    warn_msg = "Ignoring n_components with whiten=False."
+    with pytest.warns(UserWarning, match=warn_msg):
+        ica.fit(m)
+    assert hasattr(ica, "mixing_")
+
+
+def test_fastica_convergence_fail(global_random_seed):
+    # Test the FastICA algorithm on very simple data
+    # (see test_non_square_fastica).
+    # Ensure a ConvergenceWarning raised if the tolerance is sufficiently low.
+    rng = np.random.RandomState(global_random_seed)
+
+    n_samples = 1000
+    # Generate two sources:
+    t = np.linspace(0, 100, n_samples)
+    s1 = np.sin(t)
+    s2 = np.ceil(np.sin(np.pi * t))
+    s = np.c_[s1, s2].T
+    center_and_norm(s)
+
+    # Mixing matrix
+    mixing = rng.randn(6, 2)
+    m = np.dot(mixing, s)
+
+    # Do fastICA with tolerance 0. to ensure failing convergence
+    warn_msg = (
+        "FastICA did not converge. Consider increasing tolerance "
+        "or the maximum number of iterations."
+    )
+    with pytest.warns(ConvergenceWarning, match=warn_msg):
+        ica = FastICA(
+            algorithm="parallel", n_components=2, random_state=rng, max_iter=2, tol=0.0
+        )
+        ica.fit(m.T)
+
+
+@pytest.mark.parametrize("add_noise", [True, False])
+def test_non_square_fastica(global_random_seed, add_noise):
+    # Test the FastICA algorithm on very simple data.
+    rng = np.random.RandomState(global_random_seed)
+
+    n_samples = 1000
+    # Generate two sources:
+    t = np.linspace(0, 100, n_samples)
+    s1 = np.sin(t)
+    s2 = np.ceil(np.sin(np.pi * t))
+    s = np.c_[s1, s2].T
+    center_and_norm(s)
+    s1, s2 = s
+
+    # Mixing matrix
+    mixing = rng.randn(6, 2)
+    m = np.dot(mixing, s)
+
+    if add_noise:
+        m += 0.1 * rng.randn(6, n_samples)
+
+    center_and_norm(m)
+
+    k_, mixing_, s_ = fastica(
+        m.T, n_components=2, whiten="unit-variance", random_state=rng
+    )
+    s_ = s_.T
+
+    # Check that the mixing model described in the docstring holds:
+    assert_allclose(s_, np.dot(np.dot(mixing_, k_), m))
+
+    center_and_norm(s_)
+    s1_, s2_ = s_
+    # Check to see if the sources have been estimated
+    # in the wrong order
+    if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)):
+        s2_, s1_ = s_
+    s1_ *= np.sign(np.dot(s1_, s1))
+    s2_ *= np.sign(np.dot(s2_, s2))
+
+    # Check that we have estimated the original sources
+    if not add_noise:
+        assert_allclose(np.dot(s1_, s1) / n_samples, 1, atol=1e-3)
+        assert_allclose(np.dot(s2_, s2) / n_samples, 1, atol=1e-3)
+
+
+def test_fit_transform(global_random_seed, global_dtype):
+    """Test unit variance of transformed data using FastICA algorithm.
+
+    Check that `fit_transform` gives the same result as applying
+    `fit` and then `transform`.
+
+    Bug #13056
+    """
+    # multivariate uniform data in [0, 1]
+    rng = np.random.RandomState(global_random_seed)
+    X = rng.random_sample((100, 10)).astype(global_dtype)
+    max_iter = 300
+    for whiten, n_components in [["unit-variance", 5], [False, None]]:
+        n_components_ = n_components if n_components is not None else X.shape[1]
+
+        ica = FastICA(
+            n_components=n_components, max_iter=max_iter, whiten=whiten, random_state=0
+        )
+        with warnings.catch_warnings():
+            # make sure that numerical errors do not cause sqrt of negative
+            # values
+            warnings.simplefilter("error", RuntimeWarning)
+            # XXX: for some seeds, the model does not converge.
+            # However this is not what we test here.
+            warnings.simplefilter("ignore", ConvergenceWarning)
+            Xt = ica.fit_transform(X)
+        assert ica.components_.shape == (n_components_, 10)
+        assert Xt.shape == (X.shape[0], n_components_)
+
+        ica2 = FastICA(
+            n_components=n_components, max_iter=max_iter, whiten=whiten, random_state=0
+        )
+        with warnings.catch_warnings():
+            # make sure that numerical errors do not cause sqrt of negative
+            # values
+            warnings.simplefilter("error", RuntimeWarning)
+            warnings.simplefilter("ignore", ConvergenceWarning)
+            ica2.fit(X)
+        assert ica2.components_.shape == (n_components_, 10)
+        Xt2 = ica2.transform(X)
+
+        # XXX: we have to set atol for this test to pass for all seeds when
+        # fitting with float32 data. Is this revealing a bug?
+        if global_dtype:
+            atol = np.abs(Xt2).mean() / 1e6
+        else:
+            atol = 0.0  # the default rtol is enough for float64 data
+        assert_allclose(Xt, Xt2, atol=atol)
+
+
+@pytest.mark.filterwarnings("ignore:Ignoring n_components with whiten=False.")
+@pytest.mark.parametrize(
+    "whiten, n_components, expected_mixing_shape",
+    [
+        ("arbitrary-variance", 5, (10, 5)),
+        ("arbitrary-variance", 10, (10, 10)),
+        ("unit-variance", 5, (10, 5)),
+        ("unit-variance", 10, (10, 10)),
+        (False, 5, (10, 10)),
+        (False, 10, (10, 10)),
+    ],
+)
+def test_inverse_transform(
+    whiten, n_components, expected_mixing_shape, global_random_seed, global_dtype
+):
+    # Test FastICA.inverse_transform
+    n_samples = 100
+    rng = np.random.RandomState(global_random_seed)
+    X = rng.random_sample((n_samples, 10)).astype(global_dtype)
+
+    ica = FastICA(n_components=n_components, random_state=rng, whiten=whiten)
+    with warnings.catch_warnings():
+        # For some dataset (depending on the value of global_dtype) the model
+        # can fail to converge but this should not impact the definition of
+        # a valid inverse transform.
+        warnings.simplefilter("ignore", ConvergenceWarning)
+        Xt = ica.fit_transform(X)
+    assert ica.mixing_.shape == expected_mixing_shape
+    X2 = ica.inverse_transform(Xt)
+    assert X.shape == X2.shape
+
+    # reversibility test in non-reduction case
+    if n_components == X.shape[1]:
+        # XXX: we have to set atol for this test to pass for all seeds when
+        # fitting with float32 data. Is this revealing a bug?
+        if global_dtype:
+            # XXX: dividing by a smaller number makes
+            # tests fail for some seeds.
+            atol = np.abs(X2).mean() / 1e5
+        else:
+            atol = 0.0  # the default rtol is enough for float64 data
+        assert_allclose(X, X2, atol=atol)
+
+
+def test_fastica_errors():
+    n_features = 3
+    n_samples = 10
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((n_samples, n_features))
+    w_init = rng.randn(n_features + 1, n_features + 1)
+    with pytest.raises(ValueError, match=r"alpha must be in \[1,2\]"):
+        fastica(X, fun_args={"alpha": 0})
+    with pytest.raises(
+        ValueError, match=r"w_init has invalid shape.+should be \(3L?, 3L?\)"
+    ):
+        fastica(X, w_init=w_init)
+
+
+def test_fastica_whiten_unit_variance(global_random_seed):
+    """Test unit variance of transformed data using FastICA algorithm.
+
+    Bug #13056
+    """
+    rng = np.random.RandomState(global_random_seed)
+    X = rng.random_sample((100, 10))
+    n_components = X.shape[1]
+    ica = FastICA(n_components=n_components, whiten="unit-variance", random_state=0)
+    Xt = ica.fit_transform(X)
+
+    assert np.var(Xt) == pytest.approx(1.0)
+
+
+@pytest.mark.parametrize("whiten", ["arbitrary-variance", "unit-variance", False])
+@pytest.mark.parametrize("return_X_mean", [True, False])
+@pytest.mark.parametrize("return_n_iter", [True, False])
+def test_fastica_output_shape(whiten, return_X_mean, return_n_iter):
+    n_features = 3
+    n_samples = 10
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((n_samples, n_features))
+
+    expected_len = 3 + return_X_mean + return_n_iter
+
+    out = fastica(
+        X, whiten=whiten, return_n_iter=return_n_iter, return_X_mean=return_X_mean
+    )
+
+    assert len(out) == expected_len
+    if not whiten:
+        assert out[0] is None
+
+
+@pytest.mark.parametrize("add_noise", [True, False])
+def test_fastica_simple_different_solvers(add_noise, global_random_seed):
+    """Test FastICA is consistent between whiten_solvers."""
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 1000
+    # Generate two sources:
+    s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1
+    s2 = stats.t.rvs(1, size=n_samples, random_state=rng)
+    s = np.c_[s1, s2].T
+    center_and_norm(s)
+    s1, s2 = s
+
+    # Mixing angle
+    phi = rng.rand() * 2 * np.pi
+    mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]])
+    m = np.dot(mixing, s)
+
+    if add_noise:
+        m += 0.1 * rng.randn(2, 1000)
+
+    center_and_norm(m)
+
+    outs = {}
+    for solver in ("svd", "eigh"):
+        ica = FastICA(random_state=0, whiten="unit-variance", whiten_solver=solver)
+        sources = ica.fit_transform(m.T)
+        outs[solver] = sources
+        assert ica.components_.shape == (2, 2)
+        assert sources.shape == (1000, 2)
+
+    # compared numbers are not all on the same magnitude. Using a small atol to
+    # make the test less brittle
+    assert_allclose(outs["eigh"], outs["svd"], atol=1e-12)
+
+
+def test_fastica_eigh_low_rank_warning(global_random_seed):
+    """Test FastICA eigh solver raises warning for low-rank data."""
+    rng = np.random.RandomState(global_random_seed)
+    A = rng.randn(10, 2)
+    X = A @ A.T
+    ica = FastICA(random_state=0, whiten="unit-variance", whiten_solver="eigh")
+    msg = "There are some small singular values"
+
+    with pytest.warns(UserWarning, match=msg):
+        with ignore_warnings(category=ConvergenceWarning):
+            # The FastICA solver may not converge for some data with specific
+            # random seeds but this happens after the whiten step so this is
+            # not want we want to test here.
+            ica.fit(X)
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_incremental_pca.py
@@ -0,0 +1,487 @@
+"""Tests for Incremental PCA."""
+
+import itertools
+import warnings
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_equal
+
+from sklearn import datasets
+from sklearn.decomposition import PCA, IncrementalPCA
+from sklearn.utils._testing import (
+    assert_allclose_dense_sparse,
+    assert_almost_equal,
+    assert_array_almost_equal,
+)
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS, LIL_CONTAINERS
+
+iris = datasets.load_iris()
+
+
+def test_incremental_pca():
+    # Incremental PCA on dense arrays.
+    X = iris.data
+    batch_size = X.shape[0] // 3
+    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
+    pca = PCA(n_components=2)
+    pca.fit_transform(X)
+
+    X_transformed = ipca.fit_transform(X)
+
+    assert X_transformed.shape == (X.shape[0], 2)
+    np.testing.assert_allclose(
+        ipca.explained_variance_ratio_.sum(),
+        pca.explained_variance_ratio_.sum(),
+        rtol=1e-3,
+    )
+
+    for n_components in [1, 2, X.shape[1]]:
+        ipca = IncrementalPCA(n_components, batch_size=batch_size)
+        ipca.fit(X)
+        cov = ipca.get_covariance()
+        precision = ipca.get_precision()
+        np.testing.assert_allclose(
+            np.dot(cov, precision), np.eye(X.shape[1]), atol=1e-13
+        )
+
+
+@pytest.mark.parametrize(
+    "sparse_container", CSC_CONTAINERS + CSR_CONTAINERS + LIL_CONTAINERS
+)
+def test_incremental_pca_sparse(sparse_container):
+    # Incremental PCA on sparse arrays.
+    X = iris.data
+    pca = PCA(n_components=2)
+    pca.fit_transform(X)
+    X_sparse = sparse_container(X)
+    batch_size = X_sparse.shape[0] // 3
+    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
+
+    X_transformed = ipca.fit_transform(X_sparse)
+
+    assert X_transformed.shape == (X_sparse.shape[0], 2)
+    np.testing.assert_allclose(
+        ipca.explained_variance_ratio_.sum(),
+        pca.explained_variance_ratio_.sum(),
+        rtol=1e-3,
+    )
+
+    for n_components in [1, 2, X.shape[1]]:
+        ipca = IncrementalPCA(n_components, batch_size=batch_size)
+        ipca.fit(X_sparse)
+        cov = ipca.get_covariance()
+        precision = ipca.get_precision()
+        np.testing.assert_allclose(
+            np.dot(cov, precision), np.eye(X_sparse.shape[1]), atol=1e-13
+        )
+
+    with pytest.raises(
+        TypeError,
+        match=(
+            "IncrementalPCA.partial_fit does not support "
+            "sparse input. Either convert data to dense "
+            "or use IncrementalPCA.fit to do so in batches."
+        ),
+    ):
+        ipca.partial_fit(X_sparse)
+
+
+def test_incremental_pca_check_projection(global_random_seed):
+    # Test that the projection of data is correct.
+    rng = np.random.RandomState(global_random_seed)
+    n, p = 100, 3
+    X = rng.randn(n, p) * 0.1
+    X[:10] += np.array([3, 4, 5])
+    Xt = 0.1 * rng.randn(1, p) + np.array([3, 4, 5])
+
+    # Get the reconstruction of the generated data X
+    # Note that Xt has the same "components" as X, just separated
+    # This is what we want to ensure is recreated correctly
+    Yt = IncrementalPCA(n_components=2).fit(X).transform(Xt)
+
+    # Normalize
+    Yt /= np.sqrt((Yt**2).sum())
+
+    # Make sure that the first element of Yt is ~1, this means
+    # the reconstruction worked as expected
+    assert_almost_equal(np.abs(Yt[0][0]), 1.0, 1)
+
+
+def test_incremental_pca_inverse(global_random_seed):
+    # Test that the projection of data can be inverted.
+    rng = np.random.RandomState(global_random_seed)
+    n, p = 50, 3
+    X = rng.randn(n, p)  # spherical data
+    X[:, 1] *= 0.00001  # make middle component relatively small
+    X += [5, 4, 3]  # make a large mean
+
+    # same check that we can find the original data from the transformed
+    # signal (since the data is almost of rank n_components)
+    ipca = IncrementalPCA(n_components=2, batch_size=10).fit(X)
+    Y = ipca.transform(X)
+    Y_inverse = ipca.inverse_transform(Y)
+    assert_almost_equal(X, Y_inverse, decimal=3)
+
+
+def test_incremental_pca_validation():
+    # Test that n_components is <= n_features.
+    X = np.array([[0, 1, 0], [1, 0, 0]])
+    n_samples, n_features = X.shape
+    n_components = 4
+    with pytest.raises(
+        ValueError,
+        match=(
+            "n_components={} invalid"
+            " for n_features={}, need more rows than"
+            " columns for IncrementalPCA"
+            " processing".format(n_components, n_features)
+        ),
+    ):
+        IncrementalPCA(n_components, batch_size=10).fit(X)
+
+    # Test that n_components is also <= n_samples in first call to partial fit.
+    n_components = 3
+    with pytest.raises(
+        ValueError,
+        match=(
+            f"n_components={n_components} must be less or equal to the batch "
+            f"number of samples {n_samples} for the first partial_fit call."
+        ),
+    ):
+        IncrementalPCA(n_components=n_components).partial_fit(X)
+
+
+def test_n_samples_equal_n_components():
+    # Ensures no warning is raised when n_samples==n_components
+    # Non-regression test for gh-19050
+    ipca = IncrementalPCA(n_components=5)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+        ipca.partial_fit(np.random.randn(5, 7))
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", RuntimeWarning)
+        ipca.fit(np.random.randn(5, 7))
+
+
+def test_n_components_none():
+    # Ensures that n_components == None is handled correctly
+    rng = np.random.RandomState(1999)
+    for n_samples, n_features in [(50, 10), (10, 50)]:
+        X = rng.rand(n_samples, n_features)
+        ipca = IncrementalPCA(n_components=None)
+
+        # First partial_fit call, ipca.n_components_ is inferred from
+        # min(X.shape)
+        ipca.partial_fit(X)
+        assert ipca.n_components_ == min(X.shape)
+
+        # Second partial_fit call, ipca.n_components_ is inferred from
+        # ipca.components_ computed from the first partial_fit call
+        ipca.partial_fit(X)
+        assert ipca.n_components_ == ipca.components_.shape[0]
+
+
+def test_incremental_pca_set_params():
+    # Test that components_ sign is stable over batch sizes.
+    rng = np.random.RandomState(1999)
+    n_samples = 100
+    n_features = 20
+    X = rng.randn(n_samples, n_features)
+    X2 = rng.randn(n_samples, n_features)
+    X3 = rng.randn(n_samples, n_features)
+    ipca = IncrementalPCA(n_components=20)
+    ipca.fit(X)
+    # Decreasing number of components
+    ipca.set_params(n_components=10)
+    with pytest.raises(ValueError):
+        ipca.partial_fit(X2)
+    # Increasing number of components
+    ipca.set_params(n_components=15)
+    with pytest.raises(ValueError):
+        ipca.partial_fit(X3)
+    # Returning to original setting
+    ipca.set_params(n_components=20)
+    ipca.partial_fit(X)
+
+
+def test_incremental_pca_num_features_change():
+    # Test that changing n_components will raise an error.
+    rng = np.random.RandomState(1999)
+    n_samples = 100
+    X = rng.randn(n_samples, 20)
+    X2 = rng.randn(n_samples, 50)
+    ipca = IncrementalPCA(n_components=None)
+    ipca.fit(X)
+    with pytest.raises(ValueError):
+        ipca.partial_fit(X2)
+
+
+def test_incremental_pca_batch_signs(global_random_seed):
+    # Test that components_ sign is stable over batch sizes.
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 100
+    n_features = 3
+    X = rng.randn(n_samples, n_features)
+    all_components = []
+    batch_sizes = np.arange(10, 20)
+    for batch_size in batch_sizes:
+        ipca = IncrementalPCA(n_components=None, batch_size=batch_size).fit(X)
+        all_components.append(ipca.components_)
+
+    for i, j in itertools.pairwise(all_components):
+        assert_almost_equal(np.sign(i), np.sign(j), decimal=6)
+
+
+def test_incremental_pca_partial_fit_small_batch():
+    # Test that there is no minimum batch size after the first partial_fit
+    # Non-regression test
+    rng = np.random.RandomState(1999)
+    n, p = 50, 3
+    X = rng.randn(n, p)  # spherical data
+    X[:, 1] *= 0.00001  # make middle component relatively small
+    X += [5, 4, 3]  # make a large mean
+
+    n_components = p
+    pipca = IncrementalPCA(n_components=n_components)
+    pipca.partial_fit(X[:n_components])
+    for idx in range(n_components, n):
+        pipca.partial_fit(X[idx : idx + 1])
+
+    pca = PCA(n_components=n_components)
+    pca.fit(X)
+
+    assert_allclose(pca.components_, pipca.components_, atol=1e-3)
+
+
+def test_incremental_pca_batch_values(global_random_seed):
+    # Test that components_ values are stable over batch sizes.
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 100
+    n_features = 3
+    X = rng.randn(n_samples, n_features)
+    all_components = []
+    batch_sizes = np.arange(20, 40, 3)
+    for batch_size in batch_sizes:
+        ipca = IncrementalPCA(n_components=None, batch_size=batch_size).fit(X)
+        all_components.append(ipca.components_)
+
+    for i, j in itertools.pairwise(all_components):
+        assert_almost_equal(i, j, decimal=1)
+
+
+def test_incremental_pca_batch_rank():
+    # Test sample size in each batch is always larger or equal to n_components
+    rng = np.random.RandomState(1999)
+    n_samples = 100
+    n_features = 20
+    X = rng.randn(n_samples, n_features)
+    all_components = []
+    batch_sizes = np.arange(20, 90, 3)
+    for batch_size in batch_sizes:
+        ipca = IncrementalPCA(n_components=20, batch_size=batch_size).fit(X)
+        all_components.append(ipca.components_)
+
+    for components_i, components_j in itertools.pairwise(all_components):
+        assert_allclose_dense_sparse(components_i, components_j)
+
+
+def test_incremental_pca_partial_fit(global_random_seed):
+    # Test that fit and partial_fit get equivalent results.
+    rng = np.random.RandomState(global_random_seed)
+    n, p = 50, 3
+    X = rng.randn(n, p)  # spherical data
+    X[:, 1] *= 0.00001  # make middle component relatively small
+    X += [5, 4, 3]  # make a large mean
+
+    # same check that we can find the original data from the transformed
+    # signal (since the data is almost of rank n_components)
+    batch_size = 10
+    ipca = IncrementalPCA(n_components=2, batch_size=batch_size).fit(X)
+    pipca = IncrementalPCA(n_components=2, batch_size=batch_size)
+    # Add one to make sure endpoint is included
+    batch_itr = np.arange(0, n + 1, batch_size)
+    for i, j in itertools.pairwise(batch_itr):
+        pipca.partial_fit(X[i:j, :])
+    assert_almost_equal(ipca.components_, pipca.components_, decimal=3)
+
+
+def test_incremental_pca_against_pca_iris():
+    # Test that IncrementalPCA and PCA are approximate (to a sign flip).
+    X = iris.data
+
+    Y_pca = PCA(n_components=2).fit_transform(X)
+    Y_ipca = IncrementalPCA(n_components=2, batch_size=25).fit_transform(X)
+
+    assert_almost_equal(np.abs(Y_pca), np.abs(Y_ipca), 1)
+
+
+def test_incremental_pca_against_pca_random_data(global_random_seed):
+    # Test that IncrementalPCA and PCA are approximate (to a sign flip).
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 100
+    n_features = 3
+    X = rng.randn(n_samples, n_features) + 5 * rng.rand(1, n_features)
+
+    Y_pca = PCA(n_components=3).fit_transform(X)
+    Y_ipca = IncrementalPCA(n_components=3, batch_size=25).fit_transform(X)
+
+    assert_almost_equal(np.abs(Y_pca), np.abs(Y_ipca), 1)
+
+
+def test_explained_variances():
+    # Test that PCA and IncrementalPCA calculations match
+    X = datasets.make_low_rank_matrix(
+        1000, 100, tail_strength=0.0, effective_rank=10, random_state=1999
+    )
+    prec = 3
+    n_samples, n_features = X.shape
+    for nc in [None, 99]:
+        pca = PCA(n_components=nc).fit(X)
+        ipca = IncrementalPCA(n_components=nc, batch_size=100).fit(X)
+        assert_almost_equal(
+            pca.explained_variance_, ipca.explained_variance_, decimal=prec
+        )
+        assert_almost_equal(
+            pca.explained_variance_ratio_, ipca.explained_variance_ratio_, decimal=prec
+        )
+        assert_almost_equal(pca.noise_variance_, ipca.noise_variance_, decimal=prec)
+
+
+def test_singular_values(global_random_seed):
+    # Check that the IncrementalPCA output has the correct singular values
+
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 1000
+    n_features = 100
+
+    X = datasets.make_low_rank_matrix(
+        n_samples, n_features, tail_strength=0.0, effective_rank=10, random_state=rng
+    )
+
+    pca = PCA(n_components=10, svd_solver="full", random_state=rng).fit(X)
+    ipca = IncrementalPCA(n_components=10, batch_size=150).fit(X)
+    assert_array_almost_equal(pca.singular_values_, ipca.singular_values_, 2)
+
+    # Compare to the Frobenius norm
+    X_pca = pca.transform(X)
+    X_ipca = ipca.transform(X)
+    assert_array_almost_equal(
+        np.sum(pca.singular_values_**2.0), np.linalg.norm(X_pca, "fro") ** 2.0, 12
+    )
+    assert_array_almost_equal(
+        np.sum(ipca.singular_values_**2.0), np.linalg.norm(X_ipca, "fro") ** 2.0, 2
+    )
+
+    # Compare to the 2-norms of the score vectors
+    assert_array_almost_equal(
+        pca.singular_values_, np.sqrt(np.sum(X_pca**2.0, axis=0)), 12
+    )
+    assert_array_almost_equal(
+        ipca.singular_values_, np.sqrt(np.sum(X_ipca**2.0, axis=0)), 2
+    )
+
+    # Set the singular values and see what we get back
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 100
+    n_features = 110
+
+    X = datasets.make_low_rank_matrix(
+        n_samples, n_features, tail_strength=0.0, effective_rank=3, random_state=rng
+    )
+
+    pca = PCA(n_components=3, svd_solver="full", random_state=rng)
+    ipca = IncrementalPCA(n_components=3, batch_size=100)
+
+    X_pca = pca.fit_transform(X)
+    X_pca /= np.sqrt(np.sum(X_pca**2.0, axis=0))
+    X_pca[:, 0] *= 3.142
+    X_pca[:, 1] *= 2.718
+
+    X_hat = np.dot(X_pca, pca.components_)
+    pca.fit(X_hat)
+    ipca.fit(X_hat)
+    assert_array_almost_equal(pca.singular_values_, [3.142, 2.718, 1.0], 14)
+    assert_array_almost_equal(ipca.singular_values_, [3.142, 2.718, 1.0], 14)
+
+
+def test_whitening(global_random_seed):
+    # Test that PCA and IncrementalPCA transforms match to sign flip.
+    X = datasets.make_low_rank_matrix(
+        1000, 10, tail_strength=0.0, effective_rank=2, random_state=global_random_seed
+    )
+    atol = 1e-3
+    for nc in [None, 9]:
+        pca = PCA(whiten=True, n_components=nc).fit(X)
+        ipca = IncrementalPCA(whiten=True, n_components=nc, batch_size=250).fit(X)
+
+        # Since the data is rank deficient, some components are pure noise. We
+        # should not expect those dimensions to carry any signal and their
+        # values might be arbitrarily changed by implementation details of the
+        # internal SVD solver. We therefore filter them out before comparison.
+        stable_mask = pca.explained_variance_ratio_ > 1e-12
+
+        Xt_pca = pca.transform(X)
+        Xt_ipca = ipca.transform(X)
+        assert_allclose(
+            np.abs(Xt_pca)[:, stable_mask],
+            np.abs(Xt_ipca)[:, stable_mask],
+            atol=atol,
+        )
+
+        # The noisy dimensions are in the null space of the inverse transform,
+        # so they are not influencing the reconstruction. We therefore don't
+        # need to apply the mask here.
+        Xinv_ipca = ipca.inverse_transform(Xt_ipca)
+        Xinv_pca = pca.inverse_transform(Xt_pca)
+        assert_allclose(X, Xinv_ipca, atol=atol)
+        assert_allclose(X, Xinv_pca, atol=atol)
+        assert_allclose(Xinv_pca, Xinv_ipca, atol=atol)
+
+
+def test_incremental_pca_partial_fit_float_division():
+    # Test to ensure float division is used in all versions of Python
+    # (non-regression test for issue #9489)
+
+    rng = np.random.RandomState(0)
+    A = rng.randn(5, 3) + 2
+    B = rng.randn(7, 3) + 5
+
+    pca = IncrementalPCA(n_components=2)
+    pca.partial_fit(A)
+    # Set n_samples_seen_ to be a floating point number instead of an int
+    pca.n_samples_seen_ = float(pca.n_samples_seen_)
+    pca.partial_fit(B)
+    singular_vals_float_samples_seen = pca.singular_values_
+
+    pca2 = IncrementalPCA(n_components=2)
+    pca2.partial_fit(A)
+    pca2.partial_fit(B)
+    singular_vals_int_samples_seen = pca2.singular_values_
+
+    np.testing.assert_allclose(
+        singular_vals_float_samples_seen, singular_vals_int_samples_seen
+    )
+
+
+def test_incremental_pca_fit_overflow_error():
+    # Test for overflow error on Windows OS
+    # (non-regression test for issue #17693)
+    rng = np.random.RandomState(0)
+    A = rng.rand(500000, 2)
+
+    ipca = IncrementalPCA(n_components=2, batch_size=10000)
+    ipca.fit(A)
+
+    pca = PCA(n_components=2)
+    pca.fit(A)
+
+    np.testing.assert_allclose(ipca.singular_values_, pca.singular_values_)
+
+
+def test_incremental_pca_feature_names_out():
+    """Check feature names out for IncrementalPCA."""
+    ipca = IncrementalPCA(n_components=2).fit(iris.data)
+
+    names = ipca.get_feature_names_out()
+    assert_array_equal([f"incrementalpca{i}" for i in range(2)], names)
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_kernel_pca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_kernel_pca.py
@@ -0,0 +1,566 @@
+import warnings
+
+import numpy as np
+import pytest
+
+import sklearn
+from sklearn.datasets import load_iris, make_blobs, make_circles
+from sklearn.decomposition import PCA, KernelPCA
+from sklearn.exceptions import NotFittedError
+from sklearn.linear_model import Perceptron
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+from sklearn.utils.validation import _check_psd_eigenvalues
+
+
+def test_kernel_pca(global_random_seed):
+    """Nominal test for all solvers and all known kernels + a custom one
+
+    It tests
+     - that fit_transform is equivalent to fit+transform
+     - that the shapes of transforms and inverse transforms are correct
+    """
+    rng = np.random.RandomState(global_random_seed)
+    X_fit = rng.random_sample((5, 4))
+    X_pred = rng.random_sample((2, 4))
+
+    def histogram(x, y, **kwargs):
+        # Histogram kernel implemented as a callable.
+        assert kwargs == {}  # no kernel_params that we didn't ask for
+        return np.minimum(x, y).sum()
+
+    for eigen_solver in ("auto", "dense", "arpack", "randomized"):
+        for kernel in ("linear", "rbf", "poly", histogram):
+            # histogram kernel produces singular matrix inside linalg.solve
+            # XXX use a least-squares approximation?
+            inv = not callable(kernel)
+
+            # transform fit data
+            kpca = KernelPCA(
+                4, kernel=kernel, eigen_solver=eigen_solver, fit_inverse_transform=inv
+            )
+            X_fit_transformed = kpca.fit_transform(X_fit)
+            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
+            assert_array_almost_equal(
+                np.abs(X_fit_transformed), np.abs(X_fit_transformed2)
+            )
+
+            # non-regression test: previously, gamma would be 0 by default,
+            # forcing all eigenvalues to 0 under the poly kernel
+            assert X_fit_transformed.size != 0
+
+            # transform new data
+            X_pred_transformed = kpca.transform(X_pred)
+            assert X_pred_transformed.shape[1] == X_fit_transformed.shape[1]
+
+            # inverse transform
+            if inv:
+                X_pred2 = kpca.inverse_transform(X_pred_transformed)
+                assert X_pred2.shape == X_pred.shape
+
+
+def test_kernel_pca_invalid_parameters():
+    """Check that kPCA raises an error if the parameters are invalid
+
+    Tests fitting inverse transform with a precomputed kernel raises a
+    ValueError.
+    """
+    estimator = KernelPCA(
+        n_components=10, fit_inverse_transform=True, kernel="precomputed"
+    )
+    err_ms = "Cannot fit_inverse_transform with a precomputed kernel"
+    with pytest.raises(ValueError, match=err_ms):
+        estimator.fit(np.random.randn(10, 10))
+
+
+def test_kernel_pca_consistent_transform(global_random_seed):
+    """Check robustness to mutations in the original training array
+
+    Test that after fitting a kPCA model, it stays independent of any
+    mutation of the values of the original data object by relying on an
+    internal copy.
+    """
+    # X_fit_ needs to retain the old, unmodified copy of X
+    state = np.random.RandomState(global_random_seed)
+    X = state.rand(10, 10)
+    kpca = KernelPCA(random_state=state).fit(X)
+    transformed1 = kpca.transform(X)
+
+    X_copy = X.copy()
+    X[:, 0] = 666
+    transformed2 = kpca.transform(X_copy)
+    assert_array_almost_equal(transformed1, transformed2)
+
+
+def test_kernel_pca_deterministic_output(global_random_seed):
+    """Test that Kernel PCA produces deterministic output
+
+    Tests that the same inputs and random state produce the same output.
+    """
+    rng = np.random.RandomState(global_random_seed)
+    X = rng.rand(10, 10)
+    eigen_solver = ("arpack", "dense")
+
+    for solver in eigen_solver:
+        transformed_X = np.zeros((20, 2))
+        for i in range(20):
+            kpca = KernelPCA(n_components=2, eigen_solver=solver, random_state=rng)
+            transformed_X[i, :] = kpca.fit_transform(X)[0]
+        assert_allclose(transformed_X, np.tile(transformed_X[0, :], 20).reshape(20, 2))
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_kernel_pca_sparse(csr_container, global_random_seed):
+    """Test that kPCA works on a sparse data input.
+
+    Same test as ``test_kernel_pca except inverse_transform`` since it's not
+    implemented for sparse matrices.
+    """
+    rng = np.random.RandomState(global_random_seed)
+    X_fit = csr_container(rng.random_sample((5, 4)))
+    X_pred = csr_container(rng.random_sample((2, 4)))
+
+    for eigen_solver in ("auto", "arpack", "randomized"):
+        for kernel in ("linear", "rbf", "poly"):
+            # transform fit data
+            kpca = KernelPCA(
+                4,
+                kernel=kernel,
+                eigen_solver=eigen_solver,
+                fit_inverse_transform=False,
+                random_state=0,
+            )
+            X_fit_transformed = kpca.fit_transform(X_fit)
+            X_fit_transformed2 = kpca.fit(X_fit).transform(X_fit)
+            assert_array_almost_equal(
+                np.abs(X_fit_transformed), np.abs(X_fit_transformed2)
+            )
+
+            # transform new data
+            X_pred_transformed = kpca.transform(X_pred)
+            assert X_pred_transformed.shape[1] == X_fit_transformed.shape[1]
+
+            # inverse transform: not available for sparse matrices
+            # XXX: should we raise another exception type here? For instance:
+            # NotImplementedError.
+            with pytest.raises(NotFittedError):
+                kpca.inverse_transform(X_pred_transformed)
+
+
+@pytest.mark.parametrize("solver", ["auto", "dense", "arpack", "randomized"])
+@pytest.mark.parametrize("n_features", [4, 10])
+def test_kernel_pca_linear_kernel(solver, n_features, global_random_seed):
+    """Test that kPCA with linear kernel is equivalent to PCA for all solvers.
+
+    KernelPCA with linear kernel should produce the same output as PCA.
+    """
+    rng = np.random.RandomState(global_random_seed)
+    X_fit = rng.random_sample((5, n_features))
+    X_pred = rng.random_sample((2, n_features))
+
+    # for a linear kernel, kernel PCA should find the same projection as PCA
+    # modulo the sign (direction)
+    # fit only the first four components: fifth is near zero eigenvalue, so
+    # can be trimmed due to roundoff error
+    n_comps = 3 if solver == "arpack" else 4
+    assert_array_almost_equal(
+        np.abs(KernelPCA(n_comps, eigen_solver=solver).fit(X_fit).transform(X_pred)),
+        np.abs(
+            PCA(n_comps, svd_solver=solver if solver != "dense" else "full")
+            .fit(X_fit)
+            .transform(X_pred)
+        ),
+    )
+
+
+def test_kernel_pca_n_components():
+    """Test that `n_components` is correctly taken into account for projections
+
+    For all solvers this tests that the output has the correct shape depending
+    on the selected number of components.
+    """
+    rng = np.random.RandomState(0)
+    X_fit = rng.random_sample((5, 4))
+    X_pred = rng.random_sample((2, 4))
+
+    for eigen_solver in ("dense", "arpack", "randomized"):
+        for c in [1, 2, 4]:
+            kpca = KernelPCA(n_components=c, eigen_solver=eigen_solver)
+            shape = kpca.fit(X_fit).transform(X_pred).shape
+
+            assert shape == (2, c)
+
+
+def test_remove_zero_eig():
+    """Check that the ``remove_zero_eig`` parameter works correctly.
+
+    Tests that the null-space (Zero) eigenvalues are removed when
+    remove_zero_eig=True, whereas they are not by default.
+    """
+    X = np.array([[1 - 1e-30, 1], [1, 1], [1, 1 - 1e-20]])
+
+    # n_components=None (default) => remove_zero_eig is True
+    kpca = KernelPCA()
+    Xt = kpca.fit_transform(X)
+    assert Xt.shape == (3, 0)
+
+    kpca = KernelPCA(n_components=2)
+    Xt = kpca.fit_transform(X)
+    assert Xt.shape == (3, 2)
+
+    kpca = KernelPCA(n_components=2, remove_zero_eig=True)
+    Xt = kpca.fit_transform(X)
+    assert Xt.shape == (3, 0)
+
+
+def test_leave_zero_eig():
+    """Non-regression test for issue #12141 (PR #12143)
+
+    This test checks that fit().transform() returns the same result as
+    fit_transform() in case of non-removed zero eigenvalue.
+    """
+    X_fit = np.array([[1, 1], [0, 0]])
+
+    # Assert that even with all np warnings on, there is no div by zero warning
+    with warnings.catch_warnings():
+        # There might be warnings about the kernel being badly conditioned,
+        # but there should not be warnings about division by zero.
+        # (Numpy division by zero warning can have many message variants, but
+        # at least we know that it is a RuntimeWarning so lets check only this)
+        warnings.simplefilter("error", RuntimeWarning)
+        with np.errstate(all="warn"):
+            k = KernelPCA(n_components=2, remove_zero_eig=False, eigen_solver="dense")
+            # Fit, then transform
+            A = k.fit(X_fit).transform(X_fit)
+            # Do both at once
+            B = k.fit_transform(X_fit)
+            # Compare
+            assert_array_almost_equal(np.abs(A), np.abs(B))
+
+
+def test_kernel_pca_precomputed(global_random_seed):
+    """Test that kPCA works with a precomputed kernel, for all solvers"""
+    rng = np.random.RandomState(global_random_seed)
+    X_fit = rng.random_sample((5, 4))
+    X_pred = rng.random_sample((2, 4))
+
+    for eigen_solver in ("dense", "arpack", "randomized"):
+        X_kpca = (
+            KernelPCA(4, eigen_solver=eigen_solver, random_state=0)
+            .fit(X_fit)
+            .transform(X_pred)
+        )
+
+        X_kpca2 = (
+            KernelPCA(
+                4, eigen_solver=eigen_solver, kernel="precomputed", random_state=0
+            )
+            .fit(np.dot(X_fit, X_fit.T))
+            .transform(np.dot(X_pred, X_fit.T))
+        )
+
+        X_kpca_train = KernelPCA(
+            4, eigen_solver=eigen_solver, kernel="precomputed", random_state=0
+        ).fit_transform(np.dot(X_fit, X_fit.T))
+
+        X_kpca_train2 = (
+            KernelPCA(
+                4, eigen_solver=eigen_solver, kernel="precomputed", random_state=0
+            )
+            .fit(np.dot(X_fit, X_fit.T))
+            .transform(np.dot(X_fit, X_fit.T))
+        )
+
+        assert_array_almost_equal(np.abs(X_kpca), np.abs(X_kpca2))
+
+        assert_array_almost_equal(np.abs(X_kpca_train), np.abs(X_kpca_train2))
+
+
+@pytest.mark.parametrize("solver", ["auto", "dense", "arpack", "randomized"])
+def test_kernel_pca_precomputed_non_symmetric(solver):
+    """Check that the kernel centerer works.
+
+    Tests that a non symmetric precomputed kernel is actually accepted
+    because the kernel centerer does its job correctly.
+    """
+
+    # a non symmetric gram matrix
+    K = [[1, 2], [3, 40]]
+    kpca = KernelPCA(
+        kernel="precomputed", eigen_solver=solver, n_components=1, random_state=0
+    )
+    kpca.fit(K)  # no error
+
+    # same test with centered kernel
+    Kc = [[9, -9], [-9, 9]]
+    kpca_c = KernelPCA(
+        kernel="precomputed", eigen_solver=solver, n_components=1, random_state=0
+    )
+    kpca_c.fit(Kc)
+
+    # comparison between the non-centered and centered versions
+    assert_array_equal(kpca.eigenvectors_, kpca_c.eigenvectors_)
+    assert_array_equal(kpca.eigenvalues_, kpca_c.eigenvalues_)
+
+
+def test_gridsearch_pipeline():
+    """Check that kPCA works as expected in a grid search pipeline
+
+    Test if we can do a grid-search to find parameters to separate
+    circles with a perceptron model.
+    """
+    X, y = make_circles(n_samples=400, factor=0.3, noise=0.05, random_state=0)
+    kpca = KernelPCA(kernel="rbf", n_components=2)
+    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron(max_iter=5))])
+    param_grid = dict(kernel_pca__gamma=2.0 ** np.arange(-2, 2))
+    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
+    grid_search.fit(X, y)
+    assert grid_search.best_score_ == 1
+
+
+def test_gridsearch_pipeline_precomputed():
+    """Check that kPCA works as expected in a grid search pipeline (2)
+
+    Test if we can do a grid-search to find parameters to separate
+    circles with a perceptron model. This test uses a precomputed kernel.
+    """
+    X, y = make_circles(n_samples=400, factor=0.3, noise=0.05, random_state=0)
+    kpca = KernelPCA(kernel="precomputed", n_components=2)
+    pipeline = Pipeline([("kernel_pca", kpca), ("Perceptron", Perceptron(max_iter=5))])
+    param_grid = dict(Perceptron__max_iter=np.arange(1, 5))
+    grid_search = GridSearchCV(pipeline, cv=3, param_grid=param_grid)
+    X_kernel = rbf_kernel(X, gamma=2.0)
+    grid_search.fit(X_kernel, y)
+    assert grid_search.best_score_ == 1
+
+
+def test_nested_circles():
+    """Check that kPCA projects in a space where nested circles are separable
+
+    Tests that 2D nested circles become separable with a perceptron when
+    projected in the first 2 kPCA using an RBF kernel, while raw samples
+    are not directly separable in the original space.
+    """
+    X, y = make_circles(n_samples=400, factor=0.3, noise=0.05, random_state=0)
+
+    # 2D nested circles are not linearly separable
+    train_score = Perceptron(max_iter=5).fit(X, y).score(X, y)
+    assert train_score < 0.8
+
+    # Project the circles data into the first 2 components of a RBF Kernel
+    # PCA model.
+    # Note that the gamma value is data dependent. If this test breaks
+    # and the gamma value has to be updated, the Kernel PCA example will
+    # have to be updated too.
+    kpca = KernelPCA(
+        kernel="rbf", n_components=2, fit_inverse_transform=True, gamma=2.0
+    )
+    X_kpca = kpca.fit_transform(X)
+
+    # The data is perfectly linearly separable in that space
+    train_score = Perceptron(max_iter=5).fit(X_kpca, y).score(X_kpca, y)
+    assert train_score == 1.0
+
+
+def test_kernel_conditioning():
+    """Check that ``_check_psd_eigenvalues`` is correctly called in kPCA
+
+    Non-regression test for issue #12140 (PR #12145).
+    """
+
+    # create a pathological X leading to small non-zero eigenvalue
+    X = [[5, 1], [5 + 1e-8, 1e-8], [5 + 1e-8, 0]]
+    kpca = KernelPCA(kernel="linear", n_components=2, fit_inverse_transform=True)
+    kpca.fit(X)
+
+    # check that the small non-zero eigenvalue was correctly set to zero
+    assert kpca.eigenvalues_.min() == 0
+    assert np.all(kpca.eigenvalues_ == _check_psd_eigenvalues(kpca.eigenvalues_))
+
+
+@pytest.mark.parametrize("solver", ["auto", "dense", "arpack", "randomized"])
+def test_precomputed_kernel_not_psd(solver):
+    """Check how KernelPCA works with non-PSD kernels depending on n_components
+
+    Tests for all methods what happens with a non PSD gram matrix (this
+    can happen in an isomap scenario, or with custom kernel functions, or
+    maybe with ill-posed datasets).
+
+    When ``n_component`` is large enough to capture a negative eigenvalue, an
+    error should be raised. Otherwise, KernelPCA should run without error
+    since the negative eigenvalues are not selected.
+    """
+
+    # a non PSD kernel with large eigenvalues, already centered
+    # it was captured from an isomap call and multiplied by 100 for compacity
+    K = [
+        [4.48, -1.0, 8.07, 2.33, 2.33, 2.33, -5.76, -12.78],
+        [-1.0, -6.48, 4.5, -1.24, -1.24, -1.24, -0.81, 7.49],
+        [8.07, 4.5, 15.48, 2.09, 2.09, 2.09, -11.1, -23.23],
+        [2.33, -1.24, 2.09, 4.0, -3.65, -3.65, 1.02, -0.9],
+        [2.33, -1.24, 2.09, -3.65, 4.0, -3.65, 1.02, -0.9],
+        [2.33, -1.24, 2.09, -3.65, -3.65, 4.0, 1.02, -0.9],
+        [-5.76, -0.81, -11.1, 1.02, 1.02, 1.02, 4.86, 9.75],
+        [-12.78, 7.49, -23.23, -0.9, -0.9, -0.9, 9.75, 21.46],
+    ]
+    # this gram matrix has 5 positive eigenvalues and 3 negative ones
+    # [ 52.72,   7.65,   7.65,   5.02,   0.  ,  -0.  ,  -6.13, -15.11]
+
+    # 1. ask for enough components to get a significant negative one
+    kpca = KernelPCA(kernel="precomputed", eigen_solver=solver, n_components=7)
+    # make sure that the appropriate error is raised
+    with pytest.raises(ValueError, match="There are significant negative eigenvalues"):
+        kpca.fit(K)
+
+    # 2. ask for a small enough n_components to get only positive ones
+    kpca = KernelPCA(kernel="precomputed", eigen_solver=solver, n_components=2)
+    if solver == "randomized":
+        # the randomized method is still inconsistent with the others on this
+        # since it selects the eigenvalues based on the largest 2 modules, not
+        # on the largest 2 values.
+        #
+        # At least we can ensure that we return an error instead of returning
+        # the wrong eigenvalues
+        with pytest.raises(
+            ValueError, match="There are significant negative eigenvalues"
+        ):
+            kpca.fit(K)
+    else:
+        # general case: make sure that it works
+        kpca.fit(K)
+
+
+@pytest.mark.parametrize("n_components", [4, 10, 20])
+def test_kernel_pca_solvers_equivalence(n_components):
+    """Check that 'dense' 'arpack' & 'randomized' solvers give similar results"""
+
+    # Generate random data
+    n_train, n_test = 1_000, 100
+    X, _ = make_circles(
+        n_samples=(n_train + n_test), factor=0.3, noise=0.05, random_state=0
+    )
+    X_fit, X_pred = X[:n_train, :], X[n_train:, :]
+
+    # reference (full)
+    ref_pred = (
+        KernelPCA(n_components, eigen_solver="dense", random_state=0)
+        .fit(X_fit)
+        .transform(X_pred)
+    )
+
+    # arpack
+    a_pred = (
+        KernelPCA(n_components, eigen_solver="arpack", random_state=0)
+        .fit(X_fit)
+        .transform(X_pred)
+    )
+    # check that the result is still correct despite the approx
+    assert_array_almost_equal(np.abs(a_pred), np.abs(ref_pred))
+
+    # randomized
+    r_pred = (
+        KernelPCA(n_components, eigen_solver="randomized", random_state=0)
+        .fit(X_fit)
+        .transform(X_pred)
+    )
+    # check that the result is still correct despite the approximation
+    assert_array_almost_equal(np.abs(r_pred), np.abs(ref_pred))
+
+
+def test_kernel_pca_inverse_transform_reconstruction():
+    """Test if the reconstruction is a good approximation.
+
+    Note that in general it is not possible to get an arbitrarily good
+    reconstruction because of kernel centering that does not
+    preserve all the information of the original data.
+    """
+    X, *_ = make_blobs(n_samples=100, n_features=4, random_state=0)
+
+    kpca = KernelPCA(
+        n_components=20, kernel="rbf", fit_inverse_transform=True, alpha=1e-3
+    )
+    X_trans = kpca.fit_transform(X)
+    X_reconst = kpca.inverse_transform(X_trans)
+    assert np.linalg.norm(X - X_reconst) / np.linalg.norm(X) < 1e-1
+
+
+def test_kernel_pca_raise_not_fitted_error():
+    X = np.random.randn(15).reshape(5, 3)
+    kpca = KernelPCA()
+    kpca.fit(X)
+    with pytest.raises(NotFittedError):
+        kpca.inverse_transform(X)
+
+
+def test_32_64_decomposition_shape():
+    """Test that the decomposition is similar for 32 and 64 bits data
+
+    Non regression test for
+    https://github.com/scikit-learn/scikit-learn/issues/18146
+    """
+    X, y = make_blobs(
+        n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, cluster_std=0.1
+    )
+    X = StandardScaler().fit_transform(X)
+    X -= X.min()
+
+    # Compare the shapes (corresponds to the number of non-zero eigenvalues)
+    kpca = KernelPCA()
+    assert kpca.fit_transform(X).shape == kpca.fit_transform(X.astype(np.float32)).shape
+
+
+def test_kernel_pca_feature_names_out():
+    """Check feature names out for KernelPCA."""
+    X, *_ = make_blobs(n_samples=100, n_features=4, random_state=0)
+    kpca = KernelPCA(n_components=2).fit(X)
+
+    names = kpca.get_feature_names_out()
+    assert_array_equal([f"kernelpca{i}" for i in range(2)], names)
+
+
+def test_kernel_pca_inverse_correct_gamma(global_random_seed):
+    """Check that gamma is set correctly when not provided.
+
+    Non-regression test for #26280
+    """
+    rng = np.random.RandomState(global_random_seed)
+    X = rng.random_sample((5, 4))
+
+    kwargs = {
+        "n_components": 2,
+        "random_state": rng,
+        "fit_inverse_transform": True,
+        "kernel": "rbf",
+    }
+
+    expected_gamma = 1 / X.shape[1]
+    kpca1 = KernelPCA(gamma=None, **kwargs).fit(X)
+    kpca2 = KernelPCA(gamma=expected_gamma, **kwargs).fit(X)
+
+    assert kpca1.gamma_ == expected_gamma
+    assert kpca2.gamma_ == expected_gamma
+
+    X1_recon = kpca1.inverse_transform(kpca1.transform(X))
+    X2_recon = kpca2.inverse_transform(kpca1.transform(X))
+
+    assert_allclose(X1_recon, X2_recon)
+
+
+def test_kernel_pca_pandas_output():
+    """Check that KernelPCA works with pandas output when the solver is arpack.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27579
+    """
+    pytest.importorskip("pandas")
+    X, _ = load_iris(as_frame=True, return_X_y=True)
+    with sklearn.config_context(transform_output="pandas"):
+        KernelPCA(n_components=2, eigen_solver="arpack").fit_transform(X)
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_nmf.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_nmf.py
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_online_lda.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_online_lda.py
@@ -0,0 +1,482 @@
+import sys
+from io import StringIO
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+from scipy.linalg import block_diag
+from scipy.special import psi
+
+from sklearn.decomposition import LatentDirichletAllocation
+from sklearn.decomposition._online_lda_fast import (
+    _dirichlet_expectation_1d,
+    _dirichlet_expectation_2d,
+)
+from sklearn.exceptions import NotFittedError
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    if_safe_multiprocessing_with_blas,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+
+def _build_sparse_array(csr_container):
+    # Create 3 topics and each topic has 3 distinct words.
+    # (Each word only belongs to a single topic.)
+    n_components = 3
+    block = np.full((3, 3), n_components, dtype=int)
+    blocks = [block] * n_components
+    X = block_diag(*blocks)
+    X = csr_container(X)
+    return (n_components, X)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_default_prior_params(csr_container):
+    # default prior parameter should be `1 / topics`
+    # and verbose params should not affect result
+    n_components, X = _build_sparse_array(csr_container)
+    prior = 1.0 / n_components
+    lda_1 = LatentDirichletAllocation(
+        n_components=n_components,
+        doc_topic_prior=prior,
+        topic_word_prior=prior,
+        random_state=0,
+    )
+    lda_2 = LatentDirichletAllocation(n_components=n_components, random_state=0)
+    topic_distr_1 = lda_1.fit_transform(X)
+    topic_distr_2 = lda_2.fit_transform(X)
+    assert_almost_equal(topic_distr_1, topic_distr_2)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_fit_batch(csr_container):
+    # Test LDA batch learning_offset (`fit` method with 'batch' learning)
+    rng = np.random.RandomState(0)
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        evaluate_every=1,
+        learning_method="batch",
+        random_state=rng,
+    )
+    lda.fit(X)
+
+    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
+    for component in lda.components_:
+        # Find top 3 words in each LDA component
+        top_idx = set(component.argsort()[-3:][::-1])
+        assert tuple(sorted(top_idx)) in correct_idx_grps
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_fit_online(csr_container):
+    # Test LDA online learning (`fit` method with 'online' learning)
+    rng = np.random.RandomState(0)
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        learning_offset=10.0,
+        evaluate_every=1,
+        learning_method="online",
+        random_state=rng,
+    )
+    lda.fit(X)
+
+    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
+    for component in lda.components_:
+        # Find top 3 words in each LDA component
+        top_idx = set(component.argsort()[-3:][::-1])
+        assert tuple(sorted(top_idx)) in correct_idx_grps
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_partial_fit(csr_container):
+    # Test LDA online learning (`partial_fit` method)
+    # (same as test_lda_batch)
+    rng = np.random.RandomState(0)
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        learning_offset=10.0,
+        total_samples=100,
+        random_state=rng,
+    )
+    for i in range(3):
+        lda.partial_fit(X)
+
+    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
+    for c in lda.components_:
+        top_idx = set(c.argsort()[-3:][::-1])
+        assert tuple(sorted(top_idx)) in correct_idx_grps
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_dense_input(csr_container):
+    # Test LDA with dense input.
+    rng = np.random.RandomState(0)
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components, learning_method="batch", random_state=rng
+    )
+    lda.fit(X.toarray())
+
+    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
+    for component in lda.components_:
+        # Find top 3 words in each LDA component
+        top_idx = set(component.argsort()[-3:][::-1])
+        assert tuple(sorted(top_idx)) in correct_idx_grps
+
+
+def test_lda_transform():
+    # Test LDA transform.
+    # Transform result cannot be negative and should be normalized by default
+    rng = np.random.RandomState(0)
+    X = rng.randint(5, size=(20, 10))
+    n_components = 3
+    lda = LatentDirichletAllocation(n_components=n_components, random_state=rng)
+    X_trans = lda.fit_transform(X)
+    assert (X_trans > 0.0).any()
+    assert_array_almost_equal(np.sum(X_trans, axis=1), np.ones(X_trans.shape[0]))
+
+    X_trans_unnormalized = lda.transform(X, normalize=False)
+    assert_array_almost_equal(
+        X_trans, X_trans_unnormalized / X_trans_unnormalized.sum(axis=1)[:, np.newaxis]
+    )
+
+
+@pytest.mark.parametrize("method", ("online", "batch"))
+def test_lda_fit_transform(method):
+    # Test LDA fit_transform & transform
+    # fit_transform and transform result should be the same
+    rng = np.random.RandomState(0)
+    X = rng.randint(10, size=(50, 20))
+    lda = LatentDirichletAllocation(
+        n_components=5, learning_method=method, random_state=rng
+    )
+    X_fit = lda.fit_transform(X)
+    X_trans = lda.transform(X)
+    assert_array_almost_equal(X_fit, X_trans, 4)
+
+
+def test_lda_negative_input():
+    # test pass dense matrix with sparse negative input.
+    X = np.full((5, 10), -1.0)
+    lda = LatentDirichletAllocation()
+    regex = r"^Negative values in data passed"
+    with pytest.raises(ValueError, match=regex):
+        lda.fit(X)
+
+
+def test_lda_no_component_error():
+    # test `perplexity` before `fit`
+    rng = np.random.RandomState(0)
+    X = rng.randint(4, size=(20, 10))
+    lda = LatentDirichletAllocation()
+    regex = (
+        "This LatentDirichletAllocation instance is not fitted yet. "
+        "Call 'fit' with appropriate arguments before using this "
+        "estimator."
+    )
+    with pytest.raises(NotFittedError, match=regex):
+        lda.perplexity(X)
+
+
+@if_safe_multiprocessing_with_blas
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+@pytest.mark.parametrize("method", ("online", "batch"))
+def test_lda_multi_jobs(method, csr_container):
+    n_components, X = _build_sparse_array(csr_container)
+    # Test LDA batch training with multi CPU
+    rng = np.random.RandomState(0)
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        n_jobs=2,
+        learning_method=method,
+        evaluate_every=1,
+        random_state=rng,
+    )
+    lda.fit(X)
+
+    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
+    for c in lda.components_:
+        top_idx = set(c.argsort()[-3:][::-1])
+        assert tuple(sorted(top_idx)) in correct_idx_grps
+
+
+@if_safe_multiprocessing_with_blas
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_partial_fit_multi_jobs(csr_container):
+    # Test LDA online training with multi CPU
+    rng = np.random.RandomState(0)
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        n_jobs=2,
+        learning_offset=5.0,
+        total_samples=30,
+        random_state=rng,
+    )
+    for i in range(2):
+        lda.partial_fit(X)
+
+    correct_idx_grps = [(0, 1, 2), (3, 4, 5), (6, 7, 8)]
+    for c in lda.components_:
+        top_idx = set(c.argsort()[-3:][::-1])
+        assert tuple(sorted(top_idx)) in correct_idx_grps
+
+
+def test_lda_preplexity_mismatch():
+    # test dimension mismatch in `perplexity` method
+    rng = np.random.RandomState(0)
+    n_components = rng.randint(3, 6)
+    n_samples = rng.randint(6, 10)
+    X = np.random.randint(4, size=(n_samples, 10))
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        learning_offset=5.0,
+        total_samples=20,
+        random_state=rng,
+    )
+    lda.fit(X)
+    # invalid samples
+    invalid_n_samples = rng.randint(4, size=(n_samples + 1, n_components))
+    with pytest.raises(ValueError, match=r"Number of samples"):
+        lda._perplexity_precomp_distr(X, invalid_n_samples)
+    # invalid topic number
+    invalid_n_components = rng.randint(4, size=(n_samples, n_components + 1))
+    with pytest.raises(ValueError, match=r"Number of topics"):
+        lda._perplexity_precomp_distr(X, invalid_n_components)
+
+
+@pytest.mark.parametrize("method", ("online", "batch"))
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_perplexity(method, csr_container):
+    # Test LDA perplexity for batch training
+    # perplexity should be lower after each iteration
+    n_components, X = _build_sparse_array(csr_container)
+    lda_1 = LatentDirichletAllocation(
+        n_components=n_components,
+        max_iter=1,
+        learning_method=method,
+        total_samples=100,
+        random_state=0,
+    )
+    lda_2 = LatentDirichletAllocation(
+        n_components=n_components,
+        max_iter=10,
+        learning_method=method,
+        total_samples=100,
+        random_state=0,
+    )
+    lda_1.fit(X)
+    perp_1 = lda_1.perplexity(X, sub_sampling=False)
+
+    lda_2.fit(X)
+    perp_2 = lda_2.perplexity(X, sub_sampling=False)
+    assert perp_1 >= perp_2
+
+    perp_1_subsampling = lda_1.perplexity(X, sub_sampling=True)
+    perp_2_subsampling = lda_2.perplexity(X, sub_sampling=True)
+    assert perp_1_subsampling >= perp_2_subsampling
+
+
+@pytest.mark.parametrize("method", ("online", "batch"))
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_score(method, csr_container):
+    # Test LDA score for batch training
+    # score should be higher after each iteration
+    n_components, X = _build_sparse_array(csr_container)
+    lda_1 = LatentDirichletAllocation(
+        n_components=n_components,
+        max_iter=1,
+        learning_method=method,
+        total_samples=100,
+        random_state=0,
+    )
+    lda_2 = LatentDirichletAllocation(
+        n_components=n_components,
+        max_iter=10,
+        learning_method=method,
+        total_samples=100,
+        random_state=0,
+    )
+    lda_1.fit_transform(X)
+    score_1 = lda_1.score(X)
+
+    lda_2.fit_transform(X)
+    score_2 = lda_2.score(X)
+    assert score_2 >= score_1
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_perplexity_input_format(csr_container):
+    # Test LDA perplexity for sparse and dense input
+    # score should be the same for both dense and sparse input
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        max_iter=1,
+        learning_method="batch",
+        total_samples=100,
+        random_state=0,
+    )
+    lda.fit(X)
+    perp_1 = lda.perplexity(X)
+    perp_2 = lda.perplexity(X.toarray())
+    assert_almost_equal(perp_1, perp_2)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_score_perplexity(csr_container):
+    # Test the relationship between LDA score and perplexity
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components, max_iter=10, random_state=0
+    )
+    lda.fit(X)
+    perplexity_1 = lda.perplexity(X, sub_sampling=False)
+
+    score = lda.score(X)
+    perplexity_2 = np.exp(-1.0 * (score / np.sum(X.data)))
+    assert_almost_equal(perplexity_1, perplexity_2)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_fit_perplexity(csr_container):
+    # Test that the perplexity computed during fit is consistent with what is
+    # returned by the perplexity method
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        max_iter=1,
+        learning_method="batch",
+        random_state=0,
+        evaluate_every=1,
+    )
+    lda.fit(X)
+
+    # Perplexity computed at end of fit method
+    perplexity1 = lda.bound_
+
+    # Result of perplexity method on the train set
+    perplexity2 = lda.perplexity(X)
+
+    assert_almost_equal(perplexity1, perplexity2)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_empty_docs(csr_container):
+    """Test LDA on empty document (all-zero rows)."""
+    Z = np.zeros((5, 4))
+    for X in [Z, csr_container(Z)]:
+        lda = LatentDirichletAllocation(max_iter=750).fit(X)
+        assert_almost_equal(
+            lda.components_.sum(axis=0), np.ones(lda.components_.shape[1])
+        )
+
+
+def test_dirichlet_expectation():
+    """Test Cython version of Dirichlet expectation calculation."""
+    x = np.logspace(-100, 10, 10000)
+    expectation = np.empty_like(x)
+    _dirichlet_expectation_1d(x, 0, expectation)
+    assert_allclose(expectation, np.exp(psi(x) - psi(np.sum(x))), atol=1e-19)
+
+    x = x.reshape(100, 100)
+    assert_allclose(
+        _dirichlet_expectation_2d(x),
+        psi(x) - psi(np.sum(x, axis=1)[:, np.newaxis]),
+        rtol=1e-11,
+        atol=3e-9,
+    )
+
+
+def check_verbosity(
+    verbose, evaluate_every, expected_lines, expected_perplexities, csr_container
+):
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(
+        n_components=n_components,
+        max_iter=3,
+        learning_method="batch",
+        verbose=verbose,
+        evaluate_every=evaluate_every,
+        random_state=0,
+    )
+    out = StringIO()
+    old_out, sys.stdout = sys.stdout, out
+    try:
+        lda.fit(X)
+    finally:
+        sys.stdout = old_out
+
+    n_lines = out.getvalue().count("\n")
+    n_perplexity = out.getvalue().count("perplexity")
+    assert expected_lines == n_lines
+    assert expected_perplexities == n_perplexity
+
+
+@pytest.mark.parametrize(
+    "verbose,evaluate_every,expected_lines,expected_perplexities",
+    [
+        (False, 1, 0, 0),
+        (False, 0, 0, 0),
+        (True, 0, 3, 0),
+        (True, 1, 3, 3),
+        (True, 2, 3, 1),
+    ],
+)
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_verbosity(
+    verbose, evaluate_every, expected_lines, expected_perplexities, csr_container
+):
+    check_verbosity(
+        verbose, evaluate_every, expected_lines, expected_perplexities, csr_container
+    )
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_lda_feature_names_out(csr_container):
+    """Check feature names out for LatentDirichletAllocation."""
+    n_components, X = _build_sparse_array(csr_container)
+    lda = LatentDirichletAllocation(n_components=n_components).fit(X)
+
+    names = lda.get_feature_names_out()
+    assert_array_equal(
+        [f"latentdirichletallocation{i}" for i in range(n_components)], names
+    )
+
+
+@pytest.mark.parametrize("learning_method", ("batch", "online"))
+def test_lda_dtype_match(learning_method, global_dtype):
+    """Check data type preservation of fitted attributes."""
+    rng = np.random.RandomState(0)
+    X = rng.uniform(size=(20, 10)).astype(global_dtype, copy=False)
+
+    lda = LatentDirichletAllocation(
+        n_components=5, random_state=0, learning_method=learning_method
+    )
+    lda.fit(X)
+    assert lda.components_.dtype == global_dtype
+    assert lda.exp_dirichlet_component_.dtype == global_dtype
+
+
+@pytest.mark.parametrize("learning_method", ("batch", "online"))
+def test_lda_numerical_consistency(learning_method, global_random_seed):
+    """Check numerical consistency between np.float32 and np.float64."""
+    rng = np.random.RandomState(global_random_seed)
+    X64 = rng.uniform(size=(20, 10))
+    X32 = X64.astype(np.float32)
+
+    lda_64 = LatentDirichletAllocation(
+        n_components=5, random_state=global_random_seed, learning_method=learning_method
+    ).fit(X64)
+    lda_32 = LatentDirichletAllocation(
+        n_components=5, random_state=global_random_seed, learning_method=learning_method
+    ).fit(X32)
+
+    assert_allclose(lda_32.components_, lda_64.components_)
+    assert_allclose(lda_32.transform(X32), lda_64.transform(X64))
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_pca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_pca.py
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_sparse_pca.py
@@ -0,0 +1,347 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
+
+from sklearn.datasets import make_low_rank_matrix
+from sklearn.decomposition import PCA, MiniBatchSparsePCA, SparsePCA
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    if_safe_multiprocessing_with_blas,
+)
+from sklearn.utils.extmath import svd_flip
+
+
+def generate_toy_data(n_components, n_samples, image_size, random_state=None):
+    n_features = image_size[0] * image_size[1]
+
+    rng = check_random_state(random_state)
+    U = rng.randn(n_samples, n_components)
+    V = rng.randn(n_components, n_features)
+
+    centers = [(3, 3), (6, 7), (8, 1)]
+    sz = [1, 2, 1]
+    for k in range(n_components):
+        img = np.zeros(image_size)
+        xmin, xmax = centers[k][0] - sz[k], centers[k][0] + sz[k]
+        ymin, ymax = centers[k][1] - sz[k], centers[k][1] + sz[k]
+        img[xmin:xmax][:, ymin:ymax] = 1.0
+        V[k, :] = img.ravel()
+
+    # Y is defined by : Y = UV + noise
+    Y = np.dot(U, V)
+    Y += 0.1 * rng.randn(Y.shape[0], Y.shape[1])  # Add noise
+    return Y, U, V
+
+
+# SparsePCA can be a bit slow. To avoid having test times go up, we
+# test different aspects of the code in the same test
+
+
+def test_correct_shapes():
+    rng = np.random.RandomState(0)
+    X = rng.randn(12, 10)
+    spca = SparsePCA(n_components=8, random_state=rng)
+    U = spca.fit_transform(X)
+    assert spca.components_.shape == (8, 10)
+    assert U.shape == (12, 8)
+    # test overcomplete decomposition
+    spca = SparsePCA(n_components=13, random_state=rng)
+    U = spca.fit_transform(X)
+    assert spca.components_.shape == (13, 10)
+    assert U.shape == (12, 13)
+
+
+def test_fit_transform(global_random_seed):
+    alpha = 1
+    rng = np.random.RandomState(global_random_seed)
+    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
+    spca_lars = SparsePCA(
+        n_components=3, method="lars", alpha=alpha, random_state=global_random_seed
+    )
+    spca_lars.fit(Y)
+
+    # Test that CD gives similar results
+    spca_lasso = SparsePCA(
+        n_components=3, method="cd", random_state=global_random_seed, alpha=alpha
+    )
+    spca_lasso.fit(Y)
+    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
+
+
+@if_safe_multiprocessing_with_blas
+def test_fit_transform_parallel(global_random_seed):
+    alpha = 1
+    rng = np.random.RandomState(global_random_seed)
+    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
+    spca_lars = SparsePCA(
+        n_components=3, method="lars", alpha=alpha, random_state=global_random_seed
+    )
+    spca_lars.fit(Y)
+    U1 = spca_lars.transform(Y)
+    # Test multiple CPUs
+    spca = SparsePCA(
+        n_components=3,
+        n_jobs=2,
+        method="lars",
+        alpha=alpha,
+        random_state=global_random_seed,
+    ).fit(Y)
+    U2 = spca.transform(Y)
+    assert not np.all(spca_lars.components_ == 0)
+    assert_array_almost_equal(U1, U2)
+
+
+def test_transform_nan(global_random_seed):
+    # Test that SparsePCA won't return NaN when there is 0 feature in all
+    # samples.
+    rng = np.random.RandomState(global_random_seed)
+    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
+    Y[:, 0] = 0
+    estimator = SparsePCA(n_components=8, random_state=global_random_seed)
+    assert not np.any(np.isnan(estimator.fit_transform(Y)))
+
+
+def test_fit_transform_tall(global_random_seed):
+    rng = np.random.RandomState(global_random_seed)
+    Y, _, _ = generate_toy_data(3, 65, (8, 8), random_state=rng)  # tall array
+    spca_lars = SparsePCA(n_components=3, method="lars", random_state=rng)
+    U1 = spca_lars.fit_transform(Y)
+    spca_lasso = SparsePCA(n_components=3, method="cd", random_state=rng)
+    U2 = spca_lasso.fit(Y).transform(Y)
+    assert_array_almost_equal(U1, U2)
+
+
+def test_initialization(global_random_seed):
+    rng = np.random.RandomState(global_random_seed)
+    U_init = rng.randn(5, 3)
+    V_init = rng.randn(3, 4)
+    model = SparsePCA(
+        n_components=3, U_init=U_init, V_init=V_init, max_iter=0, random_state=rng
+    )
+    model.fit(rng.randn(5, 4))
+
+    expected_components = V_init / np.linalg.norm(V_init, axis=1, keepdims=True)
+    expected_components = svd_flip(u=expected_components.T, v=None)[0].T
+    assert_allclose(model.components_, expected_components)
+
+
+def test_mini_batch_correct_shapes():
+    rng = np.random.RandomState(0)
+    X = rng.randn(12, 10)
+    pca = MiniBatchSparsePCA(n_components=8, max_iter=1, random_state=rng)
+    U = pca.fit_transform(X)
+    assert pca.components_.shape == (8, 10)
+    assert U.shape == (12, 8)
+    # test overcomplete decomposition
+    pca = MiniBatchSparsePCA(n_components=13, max_iter=1, random_state=rng)
+    U = pca.fit_transform(X)
+    assert pca.components_.shape == (13, 10)
+    assert U.shape == (12, 13)
+
+
+def test_scaling_fit_transform(global_random_seed):
+    alpha = 1
+    rng = np.random.RandomState(global_random_seed)
+    Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
+    spca_lars = SparsePCA(n_components=3, method="lars", alpha=alpha, random_state=rng)
+    results_train = spca_lars.fit_transform(Y)
+    results_test = spca_lars.transform(Y[:10])
+    assert_allclose(results_train[0], results_test[0])
+
+
+def test_pca_vs_spca(global_random_seed):
+    rng = np.random.RandomState(global_random_seed)
+    Y, _, _ = generate_toy_data(3, 1000, (8, 8), random_state=rng)
+    Z, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)
+    spca = SparsePCA(alpha=0, ridge_alpha=0, n_components=2, random_state=rng)
+    pca = PCA(n_components=2, random_state=rng)
+    pca.fit(Y)
+    spca.fit(Y)
+    results_test_pca = pca.transform(Z)
+    results_test_spca = spca.transform(Z)
+    assert_allclose(
+        np.abs(spca.components_.dot(pca.components_.T)), np.eye(2), atol=1e-4
+    )
+    results_test_pca *= np.sign(results_test_pca[0, :])
+    results_test_spca *= np.sign(results_test_spca[0, :])
+    assert_allclose(results_test_pca, results_test_spca, atol=1e-4)
+
+
+@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA])
+@pytest.mark.parametrize("n_components", [None, 3])
+def test_spca_n_components_(SPCA, n_components):
+    rng = np.random.RandomState(0)
+    n_samples, n_features = 12, 10
+    X = rng.randn(n_samples, n_features)
+
+    model = SPCA(n_components=n_components).fit(X)
+
+    if n_components is not None:
+        assert model.n_components_ == n_components
+    else:
+        assert model.n_components_ == n_features
+
+
+@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA))
+@pytest.mark.parametrize("method", ("lars", "cd"))
+@pytest.mark.parametrize(
+    "data_type, expected_type",
+    (
+        (np.float32, np.float32),
+        (np.float64, np.float64),
+        (np.int32, np.float64),
+        (np.int64, np.float64),
+    ),
+)
+def test_sparse_pca_dtype_match(SPCA, method, data_type, expected_type):
+    # Verify output matrix dtype
+    n_samples, n_features, n_components = 12, 10, 3
+    rng = np.random.RandomState(0)
+    input_array = rng.randn(n_samples, n_features).astype(data_type)
+    model = SPCA(n_components=n_components, method=method)
+    transformed = model.fit_transform(input_array)
+
+    assert transformed.dtype == expected_type
+    assert model.components_.dtype == expected_type
+
+
+@pytest.mark.parametrize("SPCA", (SparsePCA, MiniBatchSparsePCA))
+@pytest.mark.parametrize("method", ("lars", "cd"))
+def test_sparse_pca_numerical_consistency(SPCA, method, global_random_seed):
+    # Verify numericall consistentency among np.float32 and np.float64
+    n_samples, n_features, n_components = 20, 20, 5
+    input_array = make_low_rank_matrix(
+        n_samples=n_samples,
+        n_features=n_features,
+        effective_rank=n_components,
+        random_state=global_random_seed,
+    )
+
+    model_32 = SPCA(
+        n_components=n_components,
+        method=method,
+        random_state=global_random_seed,
+    )
+    transformed_32 = model_32.fit_transform(input_array.astype(np.float32))
+
+    model_64 = SPCA(
+        n_components=n_components,
+        method=method,
+        random_state=global_random_seed,
+    )
+    transformed_64 = model_64.fit_transform(input_array.astype(np.float64))
+    assert_allclose(transformed_64, transformed_32)
+    assert_allclose(model_64.components_, model_32.components_)
+
+
+@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA])
+def test_spca_feature_names_out(SPCA):
+    """Check feature names out for *SparsePCA."""
+    rng = np.random.RandomState(0)
+    n_samples, n_features = 12, 10
+    X = rng.randn(n_samples, n_features)
+
+    model = SPCA(n_components=4).fit(X)
+    names = model.get_feature_names_out()
+
+    estimator_name = SPCA.__name__.lower()
+    assert_array_equal([f"{estimator_name}{i}" for i in range(4)], names)
+
+
+def test_spca_early_stopping(global_random_seed):
+    """Check that `tol` and `max_no_improvement` act as early stopping."""
+    rng = np.random.RandomState(global_random_seed)
+    n_samples, n_features = 50, 10
+    X = rng.randn(n_samples, n_features)
+
+    # vary the tolerance to force the early stopping of one of the model
+    model_early_stopped = MiniBatchSparsePCA(
+        max_iter=100, tol=0.5, random_state=global_random_seed
+    ).fit(X)
+    model_not_early_stopped = MiniBatchSparsePCA(
+        max_iter=100, tol=1e-3, random_state=global_random_seed
+    ).fit(X)
+    assert model_early_stopped.n_iter_ < model_not_early_stopped.n_iter_
+
+    # force the max number of no improvement to a large value to check that
+    # it does help to early stop
+    model_early_stopped = MiniBatchSparsePCA(
+        max_iter=100, tol=1e-6, max_no_improvement=2, random_state=global_random_seed
+    ).fit(X)
+    model_not_early_stopped = MiniBatchSparsePCA(
+        max_iter=100, tol=1e-6, max_no_improvement=100, random_state=global_random_seed
+    ).fit(X)
+    assert model_early_stopped.n_iter_ < model_not_early_stopped.n_iter_
+
+
+def test_equivalence_components_pca_spca(global_random_seed):
+    """Check the equivalence of the components found by PCA and SparsePCA.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/23932
+    """
+    rng = np.random.RandomState(global_random_seed)
+    X = rng.randn(50, 4)
+
+    n_components = 2
+    pca = PCA(
+        n_components=n_components,
+        svd_solver="randomized",
+        random_state=0,
+    ).fit(X)
+    spca = SparsePCA(
+        n_components=n_components,
+        method="lars",
+        ridge_alpha=0,
+        alpha=0,
+        random_state=0,
+    ).fit(X)
+
+    assert_allclose(pca.components_, spca.components_)
+
+
+def test_sparse_pca_inverse_transform(global_random_seed):
+    """Check that `inverse_transform` in `SparsePCA` and `PCA` are similar."""
+    rng = np.random.RandomState(global_random_seed)
+    n_samples, n_features = 10, 5
+    X = rng.randn(n_samples, n_features)
+
+    n_components = 2
+    spca = SparsePCA(
+        n_components=n_components,
+        alpha=1e-12,
+        ridge_alpha=1e-12,
+        random_state=global_random_seed,
+    )
+    pca = PCA(n_components=n_components, random_state=global_random_seed)
+    X_trans_spca = spca.fit_transform(X)
+    X_trans_pca = pca.fit_transform(X)
+    assert_allclose(
+        spca.inverse_transform(X_trans_spca), pca.inverse_transform(X_trans_pca)
+    )
+
+
+@pytest.mark.parametrize("SPCA", [SparsePCA, MiniBatchSparsePCA])
+def test_transform_inverse_transform_round_trip(SPCA, global_random_seed):
+    """Check the `transform` and `inverse_transform` round trip with no loss of
+    information.
+    """
+    rng = np.random.RandomState(global_random_seed)
+    n_samples, n_features = 10, 5
+    X = rng.randn(n_samples, n_features)
+
+    n_components = n_features
+    spca = SPCA(
+        n_components=n_components,
+        alpha=1e-12,
+        ridge_alpha=1e-12,
+        random_state=global_random_seed,
+    )
+    X_trans_spca = spca.fit_transform(X)
+    assert_allclose(spca.inverse_transform(X_trans_spca), X)
--- a/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_truncated_svd.py
+++ b/venv/lib/python3.12/site-packages/sklearn/decomposition/tests/test_truncated_svd.py
@@ -0,0 +1,212 @@
+"""Test truncated SVD transformer."""
+
+import numpy as np
+import pytest
+import scipy.sparse as sp
+
+from sklearn.decomposition import PCA, TruncatedSVD
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_allclose, assert_array_less
+
+SVD_SOLVERS = ["arpack", "randomized"]
+
+
+@pytest.fixture(scope="module")
+def X_sparse():
+    # Make an X that looks somewhat like a small tf-idf matrix.
+    rng = check_random_state(42)
+    X = sp.random(60, 55, density=0.2, format="csr", random_state=rng)
+    X.data[:] = 1 + np.log(X.data)
+    return X
+
+
+@pytest.mark.parametrize("solver", ["randomized"])
+@pytest.mark.parametrize("kind", ("dense", "sparse"))
+def test_solvers(X_sparse, solver, kind):
+    X = X_sparse if kind == "sparse" else X_sparse.toarray()
+    svd_a = TruncatedSVD(30, algorithm="arpack")
+    svd = TruncatedSVD(30, algorithm=solver, random_state=42, n_oversamples=100)
+
+    Xa = svd_a.fit_transform(X)[:, :6]
+    Xr = svd.fit_transform(X)[:, :6]
+    assert_allclose(Xa, Xr, rtol=2e-3)
+
+    comp_a = np.abs(svd_a.components_)
+    comp = np.abs(svd.components_)
+    # All elements are equal, but some elements are more equal than others.
+    assert_allclose(comp_a[:9], comp[:9], rtol=1e-3)
+    assert_allclose(comp_a[9:], comp[9:], atol=1e-2)
+
+
+@pytest.mark.parametrize("n_components", (10, 25, 41, 55))
+def test_attributes(n_components, X_sparse):
+    n_features = X_sparse.shape[1]
+    tsvd = TruncatedSVD(n_components).fit(X_sparse)
+    assert tsvd.n_components == n_components
+    assert tsvd.components_.shape == (n_components, n_features)
+
+
+@pytest.mark.parametrize(
+    "algorithm, n_components",
+    [
+        ("arpack", 55),
+        ("arpack", 56),
+        ("randomized", 56),
+    ],
+)
+def test_too_many_components(X_sparse, algorithm, n_components):
+    tsvd = TruncatedSVD(n_components=n_components, algorithm=algorithm)
+    with pytest.raises(ValueError):
+        tsvd.fit(X_sparse)
+
+
+@pytest.mark.parametrize("fmt", ("array", "csr", "csc", "coo", "lil"))
+def test_sparse_formats(fmt, X_sparse):
+    n_samples = X_sparse.shape[0]
+    Xfmt = X_sparse.toarray() if fmt == "dense" else getattr(X_sparse, "to" + fmt)()
+    tsvd = TruncatedSVD(n_components=11)
+    Xtrans = tsvd.fit_transform(Xfmt)
+    assert Xtrans.shape == (n_samples, 11)
+    Xtrans = tsvd.transform(Xfmt)
+    assert Xtrans.shape == (n_samples, 11)
+
+
+@pytest.mark.parametrize("algo", SVD_SOLVERS)
+def test_inverse_transform(algo, X_sparse):
+    # We need a lot of components for the reconstruction to be "almost
+    # equal" in all positions. XXX Test means or sums instead?
+    tsvd = TruncatedSVD(n_components=52, random_state=42, algorithm=algo)
+    Xt = tsvd.fit_transform(X_sparse)
+    Xinv = tsvd.inverse_transform(Xt)
+    assert_allclose(Xinv, X_sparse.toarray(), rtol=1e-1, atol=2e-1)
+
+
+def test_integers(X_sparse):
+    n_samples = X_sparse.shape[0]
+    Xint = X_sparse.astype(np.int64)
+    tsvd = TruncatedSVD(n_components=6)
+    Xtrans = tsvd.fit_transform(Xint)
+    assert Xtrans.shape == (n_samples, tsvd.n_components)
+
+
+@pytest.mark.parametrize("kind", ("dense", "sparse"))
+@pytest.mark.parametrize("n_components", [10, 20])
+@pytest.mark.parametrize("solver", SVD_SOLVERS)
+def test_explained_variance(X_sparse, kind, n_components, solver):
+    X = X_sparse if kind == "sparse" else X_sparse.toarray()
+    svd = TruncatedSVD(n_components, algorithm=solver)
+    X_tr = svd.fit_transform(X)
+    # Assert that all the values are greater than 0
+    assert_array_less(0.0, svd.explained_variance_ratio_)
+
+    # Assert that total explained variance is less than 1
+    assert_array_less(svd.explained_variance_ratio_.sum(), 1.0)
+
+    # Test that explained_variance is correct
+    total_variance = np.var(X_sparse.toarray(), axis=0).sum()
+    variances = np.var(X_tr, axis=0)
+    true_explained_variance_ratio = variances / total_variance
+
+    assert_allclose(
+        svd.explained_variance_ratio_,
+        true_explained_variance_ratio,
+    )
+
+
+@pytest.mark.parametrize("kind", ("dense", "sparse"))
+@pytest.mark.parametrize("solver", SVD_SOLVERS)
+def test_explained_variance_components_10_20(X_sparse, kind, solver):
+    X = X_sparse if kind == "sparse" else X_sparse.toarray()
+    svd_10 = TruncatedSVD(10, algorithm=solver, n_iter=10).fit(X)
+    svd_20 = TruncatedSVD(20, algorithm=solver, n_iter=10).fit(X)
+
+    # Assert the 1st component is equal
+    assert_allclose(
+        svd_10.explained_variance_ratio_,
+        svd_20.explained_variance_ratio_[:10],
+        rtol=5e-3,
+    )
+
+    # Assert that 20 components has higher explained variance than 10
+    assert (
+        svd_20.explained_variance_ratio_.sum() > svd_10.explained_variance_ratio_.sum()
+    )
+
+
+@pytest.mark.parametrize("solver", SVD_SOLVERS)
+def test_singular_values_consistency(solver, global_random_seed):
+    # Check that the TruncatedSVD output has the correct singular values
+    rng = np.random.RandomState(global_random_seed)
+    n_samples, n_features = 100, 80
+    X = rng.randn(n_samples, n_features)
+
+    pca = TruncatedSVD(n_components=2, algorithm=solver, random_state=rng).fit(X)
+
+    # Compare to the Frobenius norm
+    X_pca = pca.transform(X)
+    assert_allclose(
+        np.sum(pca.singular_values_**2.0),
+        np.linalg.norm(X_pca, "fro") ** 2.0,
+        rtol=1e-2,
+    )
+
+    # Compare to the 2-norms of the score vectors
+    assert_allclose(
+        pca.singular_values_, np.sqrt(np.sum(X_pca**2.0, axis=0)), rtol=1e-2
+    )
+
+
+@pytest.mark.parametrize("solver", SVD_SOLVERS)
+def test_singular_values_expected(solver, global_random_seed):
+    # Set the singular values and see what we get back
+    rng = np.random.RandomState(global_random_seed)
+    n_samples = 100
+    n_features = 110
+
+    X = rng.randn(n_samples, n_features)
+
+    pca = TruncatedSVD(n_components=3, algorithm=solver, random_state=rng)
+    X_pca = pca.fit_transform(X)
+
+    X_pca /= np.sqrt(np.sum(X_pca**2.0, axis=0))
+    X_pca[:, 0] *= 3.142
+    X_pca[:, 1] *= 2.718
+
+    X_hat_pca = np.dot(X_pca, pca.components_)
+    pca.fit(X_hat_pca)
+    assert_allclose(pca.singular_values_, [3.142, 2.718, 1.0], rtol=1e-14)
+
+
+def test_truncated_svd_eq_pca(X_sparse):
+    # TruncatedSVD should be equal to PCA on centered data
+
+    X_dense = X_sparse.toarray()
+
+    X_c = X_dense - X_dense.mean(axis=0)
+
+    params = dict(n_components=10, random_state=42)
+
+    svd = TruncatedSVD(algorithm="arpack", **params)
+    pca = PCA(svd_solver="arpack", **params)
+
+    Xt_svd = svd.fit_transform(X_c)
+    Xt_pca = pca.fit_transform(X_c)
+
+    assert_allclose(Xt_svd, Xt_pca, rtol=1e-9)
+    assert_allclose(pca.mean_, 0, atol=1e-9)
+    assert_allclose(svd.components_, pca.components_)
+
+
+@pytest.mark.parametrize(
+    "algorithm, tol", [("randomized", 0.0), ("arpack", 1e-6), ("arpack", 0.0)]
+)
+@pytest.mark.parametrize("kind", ("dense", "sparse"))
+def test_fit_transform(X_sparse, algorithm, tol, kind):
+    # fit_transform(X) should equal fit(X).transform(X)
+    X = X_sparse if kind == "sparse" else X_sparse.toarray()
+    svd = TruncatedSVD(
+        n_components=5, n_iter=7, random_state=42, algorithm=algorithm, tol=tol
+    )
+    X_transformed_1 = svd.fit_transform(X)
+    X_transformed_2 = svd.fit(X).transform(X)
+    assert_allclose(X_transformed_1, X_transformed_2)