add read me

2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/init.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/init.py
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/init.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/init.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_ball_tree.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_ball_tree.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_graph.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_graph.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_kd_tree.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_kd_tree.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_kde.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_kde.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_lof.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_lof.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_nca.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_nca.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_nearest_centroid.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_nearest_centroid.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_neighbors.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_neighbors.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_pipeline.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_pipeline.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_tree.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_neighbors_tree.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_quad_tree.cpython-312.pyc
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/pycache/test_quad_tree.cpython-312.pyc
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_ball_tree.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_ball_tree.py
@@ -0,0 +1,200 @@
+import itertools
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_almost_equal, assert_equal
+
+from sklearn.neighbors._ball_tree import BallTree, BallTree32, BallTree64
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import _convert_container
+from sklearn.utils.validation import check_array
+
+rng = np.random.RandomState(10)
+V_mahalanobis = rng.rand(3, 3)
+V_mahalanobis = np.dot(V_mahalanobis, V_mahalanobis.T)
+
+DIMENSION = 3
+
+METRICS = {
+    "euclidean": {},
+    "manhattan": {},
+    "minkowski": dict(p=3),
+    "chebyshev": {},
+}
+
+DISCRETE_METRICS = ["hamming", "canberra", "braycurtis"]
+
+BOOLEAN_METRICS = [
+    "jaccard",
+    "dice",
+    "rogerstanimoto",
+    "russellrao",
+    "sokalmichener",
+    "sokalsneath",
+]
+
+BALL_TREE_CLASSES = [
+    BallTree64,
+    BallTree32,
+]
+
+
+def brute_force_neighbors(X, Y, k, metric, **kwargs):
+    from sklearn.metrics import DistanceMetric
+
+    X, Y = check_array(X), check_array(Y)
+    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
+    ind = np.argsort(D, axis=1)[:, :k]
+    dist = D[np.arange(Y.shape[0])[:, None], ind]
+    return dist, ind
+
+
+def test_BallTree_is_BallTree64_subclass():
+    assert issubclass(BallTree, BallTree64)
+
+
+@pytest.mark.parametrize("metric", itertools.chain(BOOLEAN_METRICS, DISCRETE_METRICS))
+@pytest.mark.parametrize("array_type", ["list", "array"])
+@pytest.mark.parametrize("BallTreeImplementation", BALL_TREE_CLASSES)
+def test_ball_tree_query_metrics(metric, array_type, BallTreeImplementation):
+    rng = check_random_state(0)
+    if metric in BOOLEAN_METRICS:
+        X = rng.random_sample((40, 10)).round(0)
+        Y = rng.random_sample((10, 10)).round(0)
+    elif metric in DISCRETE_METRICS:
+        X = (4 * rng.random_sample((40, 10))).round(0)
+        Y = (4 * rng.random_sample((10, 10))).round(0)
+    X = _convert_container(X, array_type)
+    Y = _convert_container(Y, array_type)
+
+    k = 5
+
+    bt = BallTreeImplementation(X, leaf_size=1, metric=metric)
+    dist1, ind1 = bt.query(Y, k)
+    dist2, ind2 = brute_force_neighbors(X, Y, k, metric)
+    assert_array_almost_equal(dist1, dist2)
+
+
+@pytest.mark.parametrize(
+    "BallTreeImplementation, decimal_tol", zip(BALL_TREE_CLASSES, [6, 5])
+)
+def test_query_haversine(BallTreeImplementation, decimal_tol):
+    rng = check_random_state(0)
+    X = 2 * np.pi * rng.random_sample((40, 2))
+    bt = BallTreeImplementation(X, leaf_size=1, metric="haversine")
+    dist1, ind1 = bt.query(X, k=5)
+    dist2, ind2 = brute_force_neighbors(X, X, k=5, metric="haversine")
+
+    assert_array_almost_equal(dist1, dist2, decimal=decimal_tol)
+    assert_array_almost_equal(ind1, ind2)
+
+
+@pytest.mark.parametrize("BallTreeImplementation", BALL_TREE_CLASSES)
+def test_array_object_type(BallTreeImplementation):
+    """Check that we do not accept object dtype array."""
+    X = np.array([(1, 2, 3), (2, 5), (5, 5, 1, 2)], dtype=object)
+    with pytest.raises(ValueError, match="setting an array element with a sequence"):
+        BallTreeImplementation(X)
+
+
+@pytest.mark.parametrize("BallTreeImplementation", BALL_TREE_CLASSES)
+def test_bad_pyfunc_metric(BallTreeImplementation):
+    def wrong_returned_value(x, y):
+        return "1"
+
+    def one_arg_func(x):
+        return 1.0  # pragma: no cover
+
+    X = np.ones((5, 2))
+    msg = "Custom distance function must accept two vectors and return a float."
+    with pytest.raises(TypeError, match=msg):
+        BallTreeImplementation(X, metric=wrong_returned_value)
+
+    msg = "takes 1 positional argument but 2 were given"
+    with pytest.raises(TypeError, match=msg):
+        BallTreeImplementation(X, metric=one_arg_func)
+
+
+@pytest.mark.parametrize("metric", itertools.chain(METRICS, BOOLEAN_METRICS))
+def test_ball_tree_numerical_consistency(global_random_seed, metric):
+    # Results on float64 and float32 versions of a dataset must be
+    # numerically close.
+    X_64, X_32, Y_64, Y_32 = get_dataset_for_binary_tree(
+        random_seed=global_random_seed, features=50
+    )
+
+    metric_params = METRICS.get(metric, {})
+    bt_64 = BallTree64(X_64, leaf_size=1, metric=metric, **metric_params)
+    bt_32 = BallTree32(X_32, leaf_size=1, metric=metric, **metric_params)
+
+    # Test consistency with respect to the `query` method
+    k = 5
+    dist_64, ind_64 = bt_64.query(Y_64, k=k)
+    dist_32, ind_32 = bt_32.query(Y_32, k=k)
+    assert_allclose(dist_64, dist_32, rtol=1e-5)
+    assert_equal(ind_64, ind_32)
+    assert dist_64.dtype == np.float64
+    assert dist_32.dtype == np.float32
+
+    # Test consistency with respect to the `query_radius` method
+    r = 2.38
+    ind_64 = bt_64.query_radius(Y_64, r=r)
+    ind_32 = bt_32.query_radius(Y_32, r=r)
+    for _ind64, _ind32 in zip(ind_64, ind_32):
+        assert_equal(_ind64, _ind32)
+
+    # Test consistency with respect to the `query_radius` method
+    # with return distances being true
+    ind_64, dist_64 = bt_64.query_radius(Y_64, r=r, return_distance=True)
+    ind_32, dist_32 = bt_32.query_radius(Y_32, r=r, return_distance=True)
+    for _ind64, _ind32, _dist_64, _dist_32 in zip(ind_64, ind_32, dist_64, dist_32):
+        assert_equal(_ind64, _ind32)
+        assert_allclose(_dist_64, _dist_32, rtol=1e-5)
+        assert _dist_64.dtype == np.float64
+        assert _dist_32.dtype == np.float32
+
+
+@pytest.mark.parametrize("metric", itertools.chain(METRICS, BOOLEAN_METRICS))
+def test_kernel_density_numerical_consistency(global_random_seed, metric):
+    # Test consistency with respect to the `kernel_density` method
+    X_64, X_32, Y_64, Y_32 = get_dataset_for_binary_tree(random_seed=global_random_seed)
+
+    metric_params = METRICS.get(metric, {})
+    bt_64 = BallTree64(X_64, leaf_size=1, metric=metric, **metric_params)
+    bt_32 = BallTree32(X_32, leaf_size=1, metric=metric, **metric_params)
+
+    kernel = "gaussian"
+    h = 0.1
+    density64 = bt_64.kernel_density(Y_64, h=h, kernel=kernel, breadth_first=True)
+    density32 = bt_32.kernel_density(Y_32, h=h, kernel=kernel, breadth_first=True)
+    assert_allclose(density64, density32, rtol=1e-5)
+    assert density64.dtype == np.float64
+    assert density32.dtype == np.float32
+
+
+def test_two_point_correlation_numerical_consistency(global_random_seed):
+    # Test consistency with respect to the `two_point_correlation` method
+    X_64, X_32, Y_64, Y_32 = get_dataset_for_binary_tree(random_seed=global_random_seed)
+
+    bt_64 = BallTree64(X_64, leaf_size=10)
+    bt_32 = BallTree32(X_32, leaf_size=10)
+
+    r = np.linspace(0, 1, 10)
+
+    counts_64 = bt_64.two_point_correlation(Y_64, r=r, dualtree=True)
+    counts_32 = bt_32.two_point_correlation(Y_32, r=r, dualtree=True)
+    assert_allclose(counts_64, counts_32)
+
+
+def get_dataset_for_binary_tree(random_seed, features=3):
+    rng = np.random.RandomState(random_seed)
+    _X = rng.rand(100, features)
+    _Y = rng.rand(5, features)
+
+    X_64 = _X.astype(dtype=np.float64, copy=False)
+    Y_64 = _Y.astype(dtype=np.float64, copy=False)
+
+    X_32 = _X.astype(dtype=np.float32, copy=False)
+    Y_32 = _Y.astype(dtype=np.float32, copy=False)
+
+    return X_64, X_32, Y_64, Y_32
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_graph.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_graph.py
@@ -0,0 +1,101 @@
+import numpy as np
+import pytest
+
+from sklearn.metrics import euclidean_distances
+from sklearn.neighbors import KNeighborsTransformer, RadiusNeighborsTransformer
+from sklearn.neighbors._base import _is_sorted_by_data
+from sklearn.utils._testing import assert_array_equal
+
+
+def test_transformer_result():
+    # Test the number of neighbors returned
+    n_neighbors = 5
+    n_samples_fit = 20
+    n_queries = 18
+    n_features = 10
+
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples_fit, n_features)
+    X2 = rng.randn(n_queries, n_features)
+    radius = np.percentile(euclidean_distances(X), 10)
+
+    # with n_neighbors
+    for mode in ["distance", "connectivity"]:
+        add_one = mode == "distance"
+        nnt = KNeighborsTransformer(n_neighbors=n_neighbors, mode=mode)
+        Xt = nnt.fit_transform(X)
+        assert Xt.shape == (n_samples_fit, n_samples_fit)
+        assert Xt.data.shape == (n_samples_fit * (n_neighbors + add_one),)
+        assert Xt.format == "csr"
+        assert _is_sorted_by_data(Xt)
+
+        X2t = nnt.transform(X2)
+        assert X2t.shape == (n_queries, n_samples_fit)
+        assert X2t.data.shape == (n_queries * (n_neighbors + add_one),)
+        assert X2t.format == "csr"
+        assert _is_sorted_by_data(X2t)
+
+    # with radius
+    for mode in ["distance", "connectivity"]:
+        add_one = mode == "distance"
+        nnt = RadiusNeighborsTransformer(radius=radius, mode=mode)
+        Xt = nnt.fit_transform(X)
+        assert Xt.shape == (n_samples_fit, n_samples_fit)
+        assert not Xt.data.shape == (n_samples_fit * (n_neighbors + add_one),)
+        assert Xt.format == "csr"
+        assert _is_sorted_by_data(Xt)
+
+        X2t = nnt.transform(X2)
+        assert X2t.shape == (n_queries, n_samples_fit)
+        assert not X2t.data.shape == (n_queries * (n_neighbors + add_one),)
+        assert X2t.format == "csr"
+        assert _is_sorted_by_data(X2t)
+
+
+def _has_explicit_diagonal(X):
+    """Return True if the diagonal is explicitly stored"""
+    X = X.tocoo()
+    explicit = X.row[X.row == X.col]
+    return len(explicit) == X.shape[0]
+
+
+def test_explicit_diagonal():
+    # Test that the diagonal is explicitly stored in the sparse graph
+    n_neighbors = 5
+    n_samples_fit, n_samples_transform, n_features = 20, 18, 10
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples_fit, n_features)
+    X2 = rng.randn(n_samples_transform, n_features)
+
+    nnt = KNeighborsTransformer(n_neighbors=n_neighbors)
+    Xt = nnt.fit_transform(X)
+    assert _has_explicit_diagonal(Xt)
+    assert np.all(Xt.data.reshape(n_samples_fit, n_neighbors + 1)[:, 0] == 0)
+
+    Xt = nnt.transform(X)
+    assert _has_explicit_diagonal(Xt)
+    assert np.all(Xt.data.reshape(n_samples_fit, n_neighbors + 1)[:, 0] == 0)
+
+    # Using transform on new data should not always have zero diagonal
+    X2t = nnt.transform(X2)
+    assert not _has_explicit_diagonal(X2t)
+
+
+@pytest.mark.parametrize("Klass", [KNeighborsTransformer, RadiusNeighborsTransformer])
+def test_graph_feature_names_out(Klass):
+    """Check `get_feature_names_out` for transformers defined in `_graph.py`."""
+
+    n_samples_fit = 20
+    n_features = 10
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples_fit, n_features)
+
+    est = Klass().fit(X)
+    names_out = est.get_feature_names_out()
+
+    class_name_lower = Klass.__name__.lower()
+    expected_names_out = np.array(
+        [f"{class_name_lower}{i}" for i in range(est.n_samples_fit_)],
+        dtype=object,
+    )
+    assert_array_equal(names_out, expected_names_out)
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_kd_tree.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_kd_tree.py
@@ -0,0 +1,100 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_equal
+
+from sklearn.neighbors._kd_tree import KDTree, KDTree32, KDTree64
+from sklearn.neighbors.tests.test_ball_tree import get_dataset_for_binary_tree
+from sklearn.utils.parallel import Parallel, delayed
+
+DIMENSION = 3
+
+METRICS = {"euclidean": {}, "manhattan": {}, "chebyshev": {}, "minkowski": dict(p=3)}
+
+KD_TREE_CLASSES = [
+    KDTree64,
+    KDTree32,
+]
+
+
+def test_KDTree_is_KDTree64_subclass():
+    assert issubclass(KDTree, KDTree64)
+
+
+@pytest.mark.parametrize("BinarySearchTree", KD_TREE_CLASSES)
+def test_array_object_type(BinarySearchTree):
+    """Check that we do not accept object dtype array."""
+    X = np.array([(1, 2, 3), (2, 5), (5, 5, 1, 2)], dtype=object)
+    with pytest.raises(ValueError, match="setting an array element with a sequence"):
+        BinarySearchTree(X)
+
+
+@pytest.mark.parametrize("BinarySearchTree", KD_TREE_CLASSES)
+def test_kdtree_picklable_with_joblib(BinarySearchTree):
+    """Make sure that KDTree queries work when joblib memmaps.
+
+    Non-regression test for #21685 and #21228."""
+    rng = np.random.RandomState(0)
+    X = rng.random_sample((10, 3))
+    tree = BinarySearchTree(X, leaf_size=2)
+
+    # Call Parallel with max_nbytes=1 to trigger readonly memory mapping that
+    # use to raise "ValueError: buffer source array is read-only" in a previous
+    # version of the Cython code.
+    Parallel(n_jobs=2, max_nbytes=1)(delayed(tree.query)(data) for data in 2 * [X])
+
+
+@pytest.mark.parametrize("metric", METRICS)
+def test_kd_tree_numerical_consistency(global_random_seed, metric):
+    # Results on float64 and float32 versions of a dataset must be
+    # numerically close.
+    X_64, X_32, Y_64, Y_32 = get_dataset_for_binary_tree(
+        random_seed=global_random_seed, features=50
+    )
+
+    metric_params = METRICS.get(metric, {})
+    kd_64 = KDTree64(X_64, leaf_size=2, metric=metric, **metric_params)
+    kd_32 = KDTree32(X_32, leaf_size=2, metric=metric, **metric_params)
+
+    # Test consistency with respect to the `query` method
+    k = 4
+    dist_64, ind_64 = kd_64.query(Y_64, k=k)
+    dist_32, ind_32 = kd_32.query(Y_32, k=k)
+    assert_allclose(dist_64, dist_32, rtol=1e-5)
+    assert_equal(ind_64, ind_32)
+    assert dist_64.dtype == np.float64
+    assert dist_32.dtype == np.float32
+
+    # Test consistency with respect to the `query_radius` method
+    r = 2.38
+    ind_64 = kd_64.query_radius(Y_64, r=r)
+    ind_32 = kd_32.query_radius(Y_32, r=r)
+    for _ind64, _ind32 in zip(ind_64, ind_32):
+        assert_equal(_ind64, _ind32)
+
+    # Test consistency with respect to the `query_radius` method
+    # with return distances being true
+    ind_64, dist_64 = kd_64.query_radius(Y_64, r=r, return_distance=True)
+    ind_32, dist_32 = kd_32.query_radius(Y_32, r=r, return_distance=True)
+    for _ind64, _ind32, _dist_64, _dist_32 in zip(ind_64, ind_32, dist_64, dist_32):
+        assert_equal(_ind64, _ind32)
+        assert_allclose(_dist_64, _dist_32, rtol=1e-5)
+        assert _dist_64.dtype == np.float64
+        assert _dist_32.dtype == np.float32
+
+
+@pytest.mark.parametrize("metric", METRICS)
+def test_kernel_density_numerical_consistency(global_random_seed, metric):
+    # Test consistency with respect to the `kernel_density` method
+    X_64, X_32, Y_64, Y_32 = get_dataset_for_binary_tree(random_seed=global_random_seed)
+
+    metric_params = METRICS.get(metric, {})
+    kd_64 = KDTree64(X_64, leaf_size=2, metric=metric, **metric_params)
+    kd_32 = KDTree32(X_32, leaf_size=2, metric=metric, **metric_params)
+
+    kernel = "gaussian"
+    h = 0.1
+    density64 = kd_64.kernel_density(Y_64, h=h, kernel=kernel, breadth_first=True)
+    density32 = kd_32.kernel_density(Y_32, h=h, kernel=kernel, breadth_first=True)
+    assert_allclose(density64, density32, rtol=1e-5)
+    assert density64.dtype == np.float64
+    assert density32.dtype == np.float32
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_kde.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_kde.py
@@ -0,0 +1,252 @@
+import joblib
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_blobs
+from sklearn.exceptions import NotFittedError
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KDTree, KernelDensity, NearestNeighbors
+from sklearn.neighbors._ball_tree import kernel_norm
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.utils._testing import assert_allclose
+
+
+# XXX Duplicated in test_neighbors_tree, test_kde
+def compute_kernel_slow(Y, X, kernel, h):
+    if h == "scott":
+        h = X.shape[0] ** (-1 / (X.shape[1] + 4))
+    elif h == "silverman":
+        h = (X.shape[0] * (X.shape[1] + 2) / 4) ** (-1 / (X.shape[1] + 4))
+
+    d = np.sqrt(((Y[:, None, :] - X) ** 2).sum(-1))
+    norm = kernel_norm(h, X.shape[1], kernel) / X.shape[0]
+
+    if kernel == "gaussian":
+        return norm * np.exp(-0.5 * (d * d) / (h * h)).sum(-1)
+    elif kernel == "tophat":
+        return norm * (d < h).sum(-1)
+    elif kernel == "epanechnikov":
+        return norm * ((1.0 - (d * d) / (h * h)) * (d < h)).sum(-1)
+    elif kernel == "exponential":
+        return norm * (np.exp(-d / h)).sum(-1)
+    elif kernel == "linear":
+        return norm * ((1 - d / h) * (d < h)).sum(-1)
+    elif kernel == "cosine":
+        return norm * (np.cos(0.5 * np.pi * d / h) * (d < h)).sum(-1)
+    else:
+        raise ValueError("kernel not recognized")
+
+
+def check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true):
+    kde = KernelDensity(kernel=kernel, bandwidth=bandwidth, atol=atol, rtol=rtol)
+    log_dens = kde.fit(X).score_samples(Y)
+    assert_allclose(np.exp(log_dens), dens_true, atol=atol, rtol=max(1e-7, rtol))
+    assert_allclose(
+        np.exp(kde.score(Y)), np.prod(dens_true), atol=atol, rtol=max(1e-7, rtol)
+    )
+
+
+@pytest.mark.parametrize(
+    "kernel", ["gaussian", "tophat", "epanechnikov", "exponential", "linear", "cosine"]
+)
+@pytest.mark.parametrize("bandwidth", [0.01, 0.1, 1, "scott", "silverman"])
+def test_kernel_density(kernel, bandwidth):
+    n_samples, n_features = (100, 3)
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(n_samples, n_features)
+    Y = rng.randn(n_samples, n_features)
+
+    dens_true = compute_kernel_slow(Y, X, kernel, bandwidth)
+
+    for rtol in [0, 1e-5]:
+        for atol in [1e-6, 1e-2]:
+            for breadth_first in (True, False):
+                check_results(kernel, bandwidth, atol, rtol, X, Y, dens_true)
+
+
+def test_kernel_density_sampling(n_samples=100, n_features=3):
+    rng = np.random.RandomState(0)
+    X = rng.randn(n_samples, n_features)
+
+    bandwidth = 0.2
+
+    for kernel in ["gaussian", "tophat"]:
+        # draw a tophat sample
+        kde = KernelDensity(bandwidth=bandwidth, kernel=kernel).fit(X)
+        samp = kde.sample(100)
+        assert X.shape == samp.shape
+
+        # check that samples are in the right range
+        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
+        dist, ind = nbrs.kneighbors(X, return_distance=True)
+
+        if kernel == "tophat":
+            assert np.all(dist < bandwidth)
+        elif kernel == "gaussian":
+            # 5 standard deviations is safe for 100 samples, but there's a
+            # very small chance this test could fail.
+            assert np.all(dist < 5 * bandwidth)
+
+    # check unsupported kernels
+    for kernel in ["epanechnikov", "exponential", "linear", "cosine"]:
+        kde = KernelDensity(bandwidth=bandwidth, kernel=kernel).fit(X)
+        with pytest.raises(NotImplementedError):
+            kde.sample(100)
+
+    # non-regression test: used to return a scalar
+    X = rng.randn(4, 1)
+    kde = KernelDensity(kernel="gaussian").fit(X)
+    assert kde.sample().shape == (1, 1)
+
+
+@pytest.mark.parametrize("algorithm", ["auto", "ball_tree", "kd_tree"])
+@pytest.mark.parametrize(
+    "metric", ["euclidean", "minkowski", "manhattan", "chebyshev", "haversine"]
+)
+def test_kde_algorithm_metric_choice(algorithm, metric):
+    # Smoke test for various metrics and algorithms
+    rng = np.random.RandomState(0)
+    X = rng.randn(10, 2)  # 2 features required for haversine dist.
+    Y = rng.randn(10, 2)
+
+    kde = KernelDensity(algorithm=algorithm, metric=metric)
+
+    if algorithm == "kd_tree" and metric not in KDTree.valid_metrics:
+        with pytest.raises(ValueError, match="invalid metric"):
+            kde.fit(X)
+    else:
+        kde.fit(X)
+        y_dens = kde.score_samples(Y)
+        assert y_dens.shape == Y.shape[:1]
+
+
+def test_kde_score(n_samples=100, n_features=3):
+    pass
+    # FIXME
+    # rng = np.random.RandomState(0)
+    # X = rng.random_sample((n_samples, n_features))
+    # Y = rng.random_sample((n_samples, n_features))
+
+
+def test_kde_sample_weights_error():
+    kde = KernelDensity()
+    with pytest.raises(ValueError):
+        kde.fit(np.random.random((200, 10)), sample_weight=np.random.random((200, 10)))
+    with pytest.raises(ValueError):
+        kde.fit(np.random.random((200, 10)), sample_weight=-np.random.random(200))
+
+
+def test_kde_pipeline_gridsearch():
+    # test that kde plays nice in pipelines and grid-searches
+    X, _ = make_blobs(cluster_std=0.1, random_state=1, centers=[[0, 1], [1, 0], [0, 0]])
+    pipe1 = make_pipeline(
+        StandardScaler(with_mean=False, with_std=False),
+        KernelDensity(kernel="gaussian"),
+    )
+    params = dict(kerneldensity__bandwidth=[0.001, 0.01, 0.1, 1, 10])
+    search = GridSearchCV(pipe1, param_grid=params)
+    search.fit(X)
+    assert search.best_params_["kerneldensity__bandwidth"] == 0.1
+
+
+def test_kde_sample_weights():
+    n_samples = 400
+    size_test = 20
+    weights_neutral = np.full(n_samples, 3.0)
+    for d in [1, 2, 10]:
+        rng = np.random.RandomState(0)
+        X = rng.rand(n_samples, d)
+        weights = 1 + (10 * X.sum(axis=1)).astype(np.int8)
+        X_repetitions = np.repeat(X, weights, axis=0)
+        n_samples_test = size_test // d
+        test_points = rng.rand(n_samples_test, d)
+        for algorithm in ["auto", "ball_tree", "kd_tree"]:
+            for metric in ["euclidean", "minkowski", "manhattan", "chebyshev"]:
+                if algorithm != "kd_tree" or metric in KDTree.valid_metrics:
+                    kde = KernelDensity(algorithm=algorithm, metric=metric)
+
+                    # Test that adding a constant sample weight has no effect
+                    kde.fit(X, sample_weight=weights_neutral)
+                    scores_const_weight = kde.score_samples(test_points)
+                    sample_const_weight = kde.sample(random_state=1234)
+                    kde.fit(X)
+                    scores_no_weight = kde.score_samples(test_points)
+                    sample_no_weight = kde.sample(random_state=1234)
+                    assert_allclose(scores_const_weight, scores_no_weight)
+                    assert_allclose(sample_const_weight, sample_no_weight)
+
+                    # Test equivalence between sampling and (integer) weights
+                    kde.fit(X, sample_weight=weights)
+                    scores_weight = kde.score_samples(test_points)
+                    sample_weight = kde.sample(random_state=1234)
+                    kde.fit(X_repetitions)
+                    scores_ref_sampling = kde.score_samples(test_points)
+                    sample_ref_sampling = kde.sample(random_state=1234)
+                    assert_allclose(scores_weight, scores_ref_sampling)
+                    assert_allclose(sample_weight, sample_ref_sampling)
+
+                    # Test that sample weights has a non-trivial effect
+                    diff = np.max(np.abs(scores_no_weight - scores_weight))
+                    assert diff > 0.001
+
+                    # Test invariance with respect to arbitrary scaling
+                    scale_factor = rng.rand()
+                    kde.fit(X, sample_weight=(scale_factor * weights))
+                    scores_scaled_weight = kde.score_samples(test_points)
+                    assert_allclose(scores_scaled_weight, scores_weight)
+
+
+@pytest.mark.parametrize("sample_weight", [None, [0.1, 0.2, 0.3]])
+def test_pickling(tmpdir, sample_weight):
+    # Make sure that predictions are the same before and after pickling. Used
+    # to be a bug because sample_weights wasn't pickled and the resulting tree
+    # would miss some info.
+
+    kde = KernelDensity()
+    data = np.reshape([1.0, 2.0, 3.0], (-1, 1))
+    kde.fit(data, sample_weight=sample_weight)
+
+    X = np.reshape([1.1, 2.1], (-1, 1))
+    scores = kde.score_samples(X)
+
+    file_path = str(tmpdir.join("dump.pkl"))
+    joblib.dump(kde, file_path)
+    kde = joblib.load(file_path)
+    scores_pickled = kde.score_samples(X)
+
+    assert_allclose(scores, scores_pickled)
+
+
+@pytest.mark.parametrize("method", ["score_samples", "sample"])
+def test_check_is_fitted(method):
+    # Check that predict raises an exception in an unfitted estimator.
+    # Unfitted estimators should raise a NotFittedError.
+    rng = np.random.RandomState(0)
+    X = rng.randn(10, 2)
+    kde = KernelDensity()
+
+    with pytest.raises(NotFittedError):
+        getattr(kde, method)(X)
+
+
+@pytest.mark.parametrize("bandwidth", ["scott", "silverman", 0.1])
+def test_bandwidth(bandwidth):
+    n_samples, n_features = (100, 3)
+    rng = np.random.RandomState(0)
+    X = rng.randn(n_samples, n_features)
+    kde = KernelDensity(bandwidth=bandwidth).fit(X)
+    samp = kde.sample(100)
+    kde_sc = kde.score_samples(X)
+    assert X.shape == samp.shape
+    assert kde_sc.shape == (n_samples,)
+
+    # Test that the attribute self.bandwidth_ has the expected value
+    if bandwidth == "scott":
+        h = X.shape[0] ** (-1 / (X.shape[1] + 4))
+    elif bandwidth == "silverman":
+        h = (X.shape[0] * (X.shape[1] + 2) / 4) ** (-1 / (X.shape[1] + 4))
+    else:
+        h = bandwidth
+    assert kde.bandwidth_ == pytest.approx(h)
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_lof.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_lof.py
@@ -0,0 +1,394 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import re
+from math import sqrt
+
+import numpy as np
+import pytest
+
+from sklearn import metrics, neighbors
+from sklearn.datasets import load_iris
+from sklearn.metrics import roc_auc_score
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_allclose, assert_array_equal
+from sklearn.utils.estimator_checks import (
+    check_outlier_corruption,
+    parametrize_with_checks,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+# load the iris dataset
+# and randomly permute it
+rng = check_random_state(0)
+iris = load_iris()
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+
+def test_lof(global_dtype):
+    # Toy sample (the last two samples are outliers):
+    X = np.asarray(
+        [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [5, 3], [-4, 2]],
+        dtype=global_dtype,
+    )
+
+    # Test LocalOutlierFactor:
+    clf = neighbors.LocalOutlierFactor(n_neighbors=5)
+    score = clf.fit(X).negative_outlier_factor_
+    assert_array_equal(clf._fit_X, X)
+
+    # Assert largest outlier score is smaller than smallest inlier score:
+    assert np.min(score[:-2]) > np.max(score[-2:])
+
+    # Assert predict() works:
+    clf = neighbors.LocalOutlierFactor(contamination=0.25, n_neighbors=5).fit(X)
+    expected_predictions = 6 * [1] + 2 * [-1]
+    assert_array_equal(clf._predict(), expected_predictions)
+    assert_array_equal(clf.fit_predict(X), expected_predictions)
+
+
+def test_lof_performance(global_dtype):
+    # Generate train/test data
+    rng = check_random_state(2)
+    X = 0.3 * rng.randn(120, 2).astype(global_dtype, copy=False)
+    X_train = X[:100]
+
+    # Generate some abnormal novel observations
+    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)).astype(
+        global_dtype, copy=False
+    )
+    X_test = np.r_[X[100:], X_outliers]
+    y_test = np.array([0] * 20 + [1] * 20)
+
+    # fit the model for novelty detection
+    clf = neighbors.LocalOutlierFactor(novelty=True).fit(X_train)
+
+    # predict scores (the lower, the more normal)
+    y_pred = -clf.decision_function(X_test)
+
+    # check that roc_auc is good
+    assert roc_auc_score(y_test, y_pred) > 0.99
+
+
+def test_lof_values(global_dtype):
+    # toy samples:
+    X_train = np.asarray([[1, 1], [1, 2], [2, 1]], dtype=global_dtype)
+    clf1 = neighbors.LocalOutlierFactor(
+        n_neighbors=2, contamination=0.1, novelty=True
+    ).fit(X_train)
+    clf2 = neighbors.LocalOutlierFactor(n_neighbors=2, novelty=True).fit(X_train)
+    s_0 = 2.0 * sqrt(2.0) / (1.0 + sqrt(2.0))
+    s_1 = (1.0 + sqrt(2)) * (1.0 / (4.0 * sqrt(2.0)) + 1.0 / (2.0 + 2.0 * sqrt(2)))
+    # check predict()
+    assert_allclose(-clf1.negative_outlier_factor_, [s_0, s_1, s_1])
+    assert_allclose(-clf2.negative_outlier_factor_, [s_0, s_1, s_1])
+    # check predict(one sample not in train)
+    assert_allclose(-clf1.score_samples([[2.0, 2.0]]), [s_0])
+    assert_allclose(-clf2.score_samples([[2.0, 2.0]]), [s_0])
+    # check predict(one sample already in train)
+    assert_allclose(-clf1.score_samples([[1.0, 1.0]]), [s_1])
+    assert_allclose(-clf2.score_samples([[1.0, 1.0]]), [s_1])
+
+
+def test_lof_precomputed(global_dtype, random_state=42):
+    """Tests LOF with a distance matrix."""
+    # Note: smaller samples may result in spurious test success
+    rng = np.random.RandomState(random_state)
+    X = rng.random_sample((10, 4)).astype(global_dtype, copy=False)
+    Y = rng.random_sample((3, 4)).astype(global_dtype, copy=False)
+    DXX = metrics.pairwise_distances(X, metric="euclidean")
+    DYX = metrics.pairwise_distances(Y, X, metric="euclidean")
+    # As a feature matrix (n_samples by n_features)
+    lof_X = neighbors.LocalOutlierFactor(n_neighbors=3, novelty=True)
+    lof_X.fit(X)
+    pred_X_X = lof_X._predict()
+    pred_X_Y = lof_X.predict(Y)
+
+    # As a dense distance matrix (n_samples by n_samples)
+    lof_D = neighbors.LocalOutlierFactor(
+        n_neighbors=3, algorithm="brute", metric="precomputed", novelty=True
+    )
+    lof_D.fit(DXX)
+    pred_D_X = lof_D._predict()
+    pred_D_Y = lof_D.predict(DYX)
+
+    assert_allclose(pred_X_X, pred_D_X)
+    assert_allclose(pred_X_Y, pred_D_Y)
+
+
+def test_n_neighbors_attribute():
+    X = iris.data
+    clf = neighbors.LocalOutlierFactor(n_neighbors=500).fit(X)
+    assert clf.n_neighbors_ == X.shape[0] - 1
+
+    clf = neighbors.LocalOutlierFactor(n_neighbors=500)
+    msg = "n_neighbors will be set to (n_samples - 1)"
+    with pytest.warns(UserWarning, match=re.escape(msg)):
+        clf.fit(X)
+    assert clf.n_neighbors_ == X.shape[0] - 1
+
+
+def test_score_samples(global_dtype):
+    X_train = np.asarray([[1, 1], [1, 2], [2, 1]], dtype=global_dtype)
+    X_test = np.asarray([[2.0, 2.0]], dtype=global_dtype)
+    clf1 = neighbors.LocalOutlierFactor(
+        n_neighbors=2, contamination=0.1, novelty=True
+    ).fit(X_train)
+    clf2 = neighbors.LocalOutlierFactor(n_neighbors=2, novelty=True).fit(X_train)
+
+    clf1_scores = clf1.score_samples(X_test)
+    clf1_decisions = clf1.decision_function(X_test)
+
+    clf2_scores = clf2.score_samples(X_test)
+    clf2_decisions = clf2.decision_function(X_test)
+
+    assert_allclose(
+        clf1_scores,
+        clf1_decisions + clf1.offset_,
+    )
+    assert_allclose(
+        clf2_scores,
+        clf2_decisions + clf2.offset_,
+    )
+    assert_allclose(clf1_scores, clf2_scores)
+
+
+def test_novelty_errors():
+    X = iris.data
+
+    # check errors for novelty=False
+    clf = neighbors.LocalOutlierFactor()
+    clf.fit(X)
+    # predict, decision_function and score_samples raise ValueError
+    for method in ["predict", "decision_function", "score_samples"]:
+        outer_msg = f"'LocalOutlierFactor' has no attribute '{method}'"
+        inner_msg = "{} is not available when novelty=False".format(method)
+        with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+            getattr(clf, method)
+
+        assert isinstance(exec_info.value.__cause__, AttributeError)
+        assert inner_msg in str(exec_info.value.__cause__)
+
+    # check errors for novelty=True
+    clf = neighbors.LocalOutlierFactor(novelty=True)
+
+    outer_msg = "'LocalOutlierFactor' has no attribute 'fit_predict'"
+    inner_msg = "fit_predict is not available when novelty=True"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        getattr(clf, "fit_predict")
+
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
+
+
+def test_novelty_training_scores(global_dtype):
+    # check that the scores of the training samples are still accessible
+    # when novelty=True through the negative_outlier_factor_ attribute
+    X = iris.data.astype(global_dtype)
+
+    # fit with novelty=False
+    clf_1 = neighbors.LocalOutlierFactor()
+    clf_1.fit(X)
+    scores_1 = clf_1.negative_outlier_factor_
+
+    # fit with novelty=True
+    clf_2 = neighbors.LocalOutlierFactor(novelty=True)
+    clf_2.fit(X)
+    scores_2 = clf_2.negative_outlier_factor_
+
+    assert_allclose(scores_1, scores_2)
+
+
+def test_hasattr_prediction():
+    # check availability of prediction methods depending on novelty value.
+    X = [[1, 1], [1, 2], [2, 1]]
+
+    # when novelty=True
+    clf = neighbors.LocalOutlierFactor(novelty=True)
+    clf.fit(X)
+    assert hasattr(clf, "predict")
+    assert hasattr(clf, "decision_function")
+    assert hasattr(clf, "score_samples")
+    assert not hasattr(clf, "fit_predict")
+
+    # when novelty=False
+    clf = neighbors.LocalOutlierFactor(novelty=False)
+    clf.fit(X)
+    assert hasattr(clf, "fit_predict")
+    assert not hasattr(clf, "predict")
+    assert not hasattr(clf, "decision_function")
+    assert not hasattr(clf, "score_samples")
+
+
+@parametrize_with_checks([neighbors.LocalOutlierFactor(novelty=True)])
+def test_novelty_true_common_tests(estimator, check):
+    # the common tests are run for the default LOF (novelty=False).
+    # here we run these common tests for LOF when novelty=True
+    check(estimator)
+
+
+@pytest.mark.parametrize("expected_outliers", [30, 53])
+def test_predicted_outlier_number(expected_outliers):
+    # the number of predicted outliers should be equal to the number of
+    # expected outliers unless there are ties in the abnormality scores.
+    X = iris.data
+    n_samples = X.shape[0]
+    contamination = float(expected_outliers) / n_samples
+
+    clf = neighbors.LocalOutlierFactor(contamination=contamination)
+    y_pred = clf.fit_predict(X)
+
+    num_outliers = np.sum(y_pred != 1)
+    if num_outliers != expected_outliers:
+        y_dec = clf.negative_outlier_factor_
+        check_outlier_corruption(num_outliers, expected_outliers, y_dec)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_sparse(csr_container):
+    # LocalOutlierFactor must support CSR inputs
+    # TODO: compare results on dense and sparse data as proposed in:
+    # https://github.com/scikit-learn/scikit-learn/pull/23585#discussion_r968388186
+    X = csr_container(iris.data)
+
+    lof = neighbors.LocalOutlierFactor(novelty=True)
+    lof.fit(X)
+    lof.predict(X)
+    lof.score_samples(X)
+    lof.decision_function(X)
+
+    lof = neighbors.LocalOutlierFactor(novelty=False)
+    lof.fit_predict(X)
+
+
+def test_lof_error_n_neighbors_too_large():
+    """Check that we raise a proper error message when n_neighbors == n_samples.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/17207
+    """
+    X = np.ones((7, 7))
+
+    msg = (
+        "Expected n_neighbors < n_samples_fit, but n_neighbors = 1, "
+        "n_samples_fit = 1, n_samples = 1"
+    )
+    with pytest.raises(ValueError, match=msg):
+        lof = neighbors.LocalOutlierFactor(n_neighbors=1).fit(X[:1])
+
+    lof = neighbors.LocalOutlierFactor(n_neighbors=2).fit(X[:2])
+    assert lof.n_samples_fit_ == 2
+
+    msg = (
+        "Expected n_neighbors < n_samples_fit, but n_neighbors = 2, "
+        "n_samples_fit = 2, n_samples = 2"
+    )
+    with pytest.raises(ValueError, match=msg):
+        lof.kneighbors(None, n_neighbors=2)
+
+    distances, indices = lof.kneighbors(None, n_neighbors=1)
+    assert distances.shape == (2, 1)
+    assert indices.shape == (2, 1)
+
+    msg = (
+        "Expected n_neighbors <= n_samples_fit, but n_neighbors = 3, "
+        "n_samples_fit = 2, n_samples = 7"
+    )
+    with pytest.raises(ValueError, match=msg):
+        lof.kneighbors(X, n_neighbors=3)
+
+    (
+        distances,
+        indices,
+    ) = lof.kneighbors(X, n_neighbors=2)
+    assert distances.shape == (7, 2)
+    assert indices.shape == (7, 2)
+
+
+@pytest.mark.parametrize("algorithm", ["auto", "ball_tree", "kd_tree", "brute"])
+@pytest.mark.parametrize("novelty", [True, False])
+@pytest.mark.parametrize("contamination", [0.5, "auto"])
+def test_lof_input_dtype_preservation(global_dtype, algorithm, contamination, novelty):
+    """Check that the fitted attributes are stored using the data type of X."""
+    X = iris.data.astype(global_dtype, copy=False)
+
+    iso = neighbors.LocalOutlierFactor(
+        n_neighbors=5, algorithm=algorithm, contamination=contamination, novelty=novelty
+    )
+    iso.fit(X)
+
+    assert iso.negative_outlier_factor_.dtype == global_dtype
+
+    for method in ("score_samples", "decision_function"):
+        if hasattr(iso, method):
+            y_pred = getattr(iso, method)(X)
+            assert y_pred.dtype == global_dtype
+
+
+@pytest.mark.parametrize("algorithm", ["auto", "ball_tree", "kd_tree", "brute"])
+@pytest.mark.parametrize("novelty", [True, False])
+@pytest.mark.parametrize("contamination", [0.5, "auto"])
+def test_lof_dtype_equivalence(algorithm, novelty, contamination):
+    """Check the equivalence of the results with 32 and 64 bits input."""
+
+    inliers = iris.data[:50]  # setosa iris are really distinct from others
+    outliers = iris.data[-5:]  # virginica will be considered as outliers
+    # lower the precision of the input data to check that we have an equivalence when
+    # making the computation in 32 and 64 bits.
+    X = np.concatenate([inliers, outliers], axis=0).astype(np.float32)
+
+    lof_32 = neighbors.LocalOutlierFactor(
+        algorithm=algorithm, novelty=novelty, contamination=contamination
+    )
+    X_32 = X.astype(np.float32, copy=True)
+    lof_32.fit(X_32)
+
+    lof_64 = neighbors.LocalOutlierFactor(
+        algorithm=algorithm, novelty=novelty, contamination=contamination
+    )
+    X_64 = X.astype(np.float64, copy=True)
+    lof_64.fit(X_64)
+
+    assert_allclose(lof_32.negative_outlier_factor_, lof_64.negative_outlier_factor_)
+
+    for method in ("score_samples", "decision_function", "predict", "fit_predict"):
+        if hasattr(lof_32, method):
+            y_pred_32 = getattr(lof_32, method)(X_32)
+            y_pred_64 = getattr(lof_64, method)(X_64)
+            assert_allclose(y_pred_32, y_pred_64, atol=0.0002)
+
+
+def test_lof_duplicate_samples():
+    """
+    Check that LocalOutlierFactor raises a warning when duplicate values
+    in the training data cause inaccurate results.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27839
+    """
+
+    rng = np.random.default_rng(0)
+
+    x = rng.permutation(
+        np.hstack(
+            [
+                [0.1] * 1000,  # constant values
+                np.linspace(0.1, 0.3, num=3000),
+                rng.random(500) * 100,  # the clear outliers
+            ]
+        )
+    )
+    X = x.reshape(-1, 1)
+
+    error_msg = (
+        "Duplicate values are leading to incorrect results. "
+        "Increase the number of neighbors for more accurate results."
+    )
+
+    lof = neighbors.LocalOutlierFactor(n_neighbors=5, contamination=0.1)
+
+    # Catch the warning
+    with pytest.warns(UserWarning, match=re.escape(error_msg)):
+        lof.fit_predict(X)
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_nca.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_nca.py
@@ -0,0 +1,563 @@
+"""
+Testing for Neighborhood Component Analysis module (sklearn.neighbors.nca)
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import re
+
+import numpy as np
+import pytest
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+from scipy.optimize import check_grad
+
+from sklearn import clone
+from sklearn.datasets import load_iris, make_blobs, make_classification
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.metrics import pairwise_distances
+from sklearn.neighbors import NeighborhoodComponentsAnalysis
+from sklearn.preprocessing import LabelEncoder
+from sklearn.utils import check_random_state
+from sklearn.utils.validation import validate_data
+
+rng = check_random_state(0)
+# Load and shuffle the iris dataset.
+iris = load_iris()
+perm = rng.permutation(iris.target.size)
+iris_data = iris.data[perm]
+iris_target = iris.target[perm]
+# Avoid having test data introducing dependencies between tests.
+iris_data.flags.writeable = False
+iris_target.flags.writeable = False
+EPS = np.finfo(float).eps
+
+
+def test_simple_example():
+    """Test on a simple example.
+
+    Puts four points in the input space where the opposite labels points are
+    next to each other. After transform the samples from the same class
+    should be next to each other.
+
+    """
+    X = np.array([[0, 0], [0, 1], [2, 0], [2, 1]])
+    y = np.array([1, 0, 1, 0])
+    nca = NeighborhoodComponentsAnalysis(
+        n_components=2, init="identity", random_state=42
+    )
+    nca.fit(X, y)
+    X_t = nca.transform(X)
+    assert_array_equal(pairwise_distances(X_t).argsort()[:, 1], np.array([2, 3, 0, 1]))
+
+
+def test_toy_example_collapse_points():
+    """Test on a toy example of three points that should collapse
+
+    We build a simple example: two points from the same class and a point from
+    a different class in the middle of them. On this simple example, the new
+    (transformed) points should all collapse into one single point. Indeed, the
+    objective is 2/(1 + exp(d/2)), with d the euclidean distance between the
+    two samples from the same class. This is maximized for d=0 (because d>=0),
+    with an objective equal to 1 (loss=-1.).
+
+    """
+    rng = np.random.RandomState(42)
+    input_dim = 5
+    two_points = rng.randn(2, input_dim)
+    X = np.vstack([two_points, two_points.mean(axis=0)[np.newaxis, :]])
+    y = [0, 0, 1]
+
+    class LossStorer:
+        def __init__(self, X, y):
+            self.loss = np.inf  # initialize the loss to very high
+            # Initialize a fake NCA and variables needed to compute the loss:
+            self.fake_nca = NeighborhoodComponentsAnalysis()
+            self.fake_nca.n_iter_ = np.inf
+            self.X, y = validate_data(self.fake_nca, X, y, ensure_min_samples=2)
+            y = LabelEncoder().fit_transform(y)
+            self.same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+        def callback(self, transformation, n_iter):
+            """Stores the last value of the loss function"""
+            self.loss, _ = self.fake_nca._loss_grad_lbfgs(
+                transformation, self.X, self.same_class_mask, -1.0
+            )
+
+    loss_storer = LossStorer(X, y)
+    nca = NeighborhoodComponentsAnalysis(random_state=42, callback=loss_storer.callback)
+    X_t = nca.fit_transform(X, y)
+    print(X_t)
+    # test that points are collapsed into one point
+    assert_array_almost_equal(X_t - X_t[0], 0.0)
+    assert abs(loss_storer.loss + 1) < 1e-10
+
+
+def test_finite_differences(global_random_seed):
+    """Test gradient of loss function
+
+    Assert that the gradient is almost equal to its finite differences
+    approximation.
+    """
+    # Initialize the transformation `M`, as well as `X` and `y` and `NCA`
+    rng = np.random.RandomState(global_random_seed)
+    X, y = make_classification(random_state=global_random_seed)
+    M = rng.randn(rng.randint(1, X.shape[1] + 1), X.shape[1])
+    nca = NeighborhoodComponentsAnalysis()
+    nca.n_iter_ = 0
+    mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+    def fun(M):
+        return nca._loss_grad_lbfgs(M, X, mask)[0]
+
+    def grad(M):
+        return nca._loss_grad_lbfgs(M, X, mask)[1]
+
+    # compare the gradient to a finite difference approximation
+    diff = check_grad(fun, grad, M.ravel())
+    assert diff == pytest.approx(0.0, abs=1e-4)
+
+
+def test_params_validation():
+    # Test that invalid parameters raise value error
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+    NCA = NeighborhoodComponentsAnalysis
+    rng = np.random.RandomState(42)
+
+    init = rng.rand(5, 3)
+    msg = (
+        f"The output dimensionality ({init.shape[0]}) "
+        "of the given linear transformation `init` cannot be "
+        f"greater than its input dimensionality ({init.shape[1]})."
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        NCA(init=init).fit(X, y)
+    n_components = 10
+    msg = (
+        "The preferred dimensionality of the projected space "
+        f"`n_components` ({n_components}) cannot be greater "
+        f"than the given data dimensionality ({X.shape[1]})!"
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        NCA(n_components=n_components).fit(X, y)
+
+
+def test_transformation_dimensions():
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+
+    # Fail if transformation input dimension does not match inputs dimensions
+    transformation = np.array([[1, 2], [3, 4]])
+    with pytest.raises(ValueError):
+        NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)
+
+    # Fail if transformation output dimension is larger than
+    # transformation input dimension
+    transformation = np.array([[1, 2], [3, 4], [5, 6]])
+    # len(transformation) > len(transformation[0])
+    with pytest.raises(ValueError):
+        NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)
+
+    # Pass otherwise
+    transformation = np.arange(9).reshape(3, 3)
+    NeighborhoodComponentsAnalysis(init=transformation).fit(X, y)
+
+
+def test_n_components():
+    rng = np.random.RandomState(42)
+    X = np.arange(12).reshape(4, 3)
+    y = [1, 1, 2, 2]
+
+    init = rng.rand(X.shape[1] - 1, 3)
+
+    # n_components = X.shape[1] != transformation.shape[0]
+    n_components = X.shape[1]
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    msg = (
+        "The preferred dimensionality of the projected space "
+        f"`n_components` ({n_components}) does not match the output "
+        "dimensionality of the given linear transformation "
+        f"`init` ({init.shape[0]})!"
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        nca.fit(X, y)
+
+    # n_components > X.shape[1]
+    n_components = X.shape[1] + 2
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    msg = (
+        "The preferred dimensionality of the projected space "
+        f"`n_components` ({n_components}) cannot be greater than "
+        f"the given data dimensionality ({X.shape[1]})!"
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        nca.fit(X, y)
+
+    # n_components < X.shape[1]
+    nca = NeighborhoodComponentsAnalysis(n_components=2, init="identity")
+    nca.fit(X, y)
+
+
+def test_init_transformation():
+    rng = np.random.RandomState(42)
+    X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)
+
+    # Start learning from scratch
+    nca = NeighborhoodComponentsAnalysis(init="identity")
+    nca.fit(X, y)
+
+    # Initialize with random
+    nca_random = NeighborhoodComponentsAnalysis(init="random")
+    nca_random.fit(X, y)
+
+    # Initialize with auto
+    nca_auto = NeighborhoodComponentsAnalysis(init="auto")
+    nca_auto.fit(X, y)
+
+    # Initialize with PCA
+    nca_pca = NeighborhoodComponentsAnalysis(init="pca")
+    nca_pca.fit(X, y)
+
+    # Initialize with LDA
+    nca_lda = NeighborhoodComponentsAnalysis(init="lda")
+    nca_lda.fit(X, y)
+
+    init = rng.rand(X.shape[1], X.shape[1])
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    nca.fit(X, y)
+
+    # init.shape[1] must match X.shape[1]
+    init = rng.rand(X.shape[1], X.shape[1] + 1)
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    msg = (
+        f"The input dimensionality ({init.shape[1]}) of the given "
+        "linear transformation `init` must match the "
+        f"dimensionality of the given inputs `X` ({X.shape[1]})."
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        nca.fit(X, y)
+
+    # init.shape[0] must be <= init.shape[1]
+    init = rng.rand(X.shape[1] + 1, X.shape[1])
+    nca = NeighborhoodComponentsAnalysis(init=init)
+    msg = (
+        f"The output dimensionality ({init.shape[0]}) of the given "
+        "linear transformation `init` cannot be "
+        f"greater than its input dimensionality ({init.shape[1]})."
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        nca.fit(X, y)
+
+    # init.shape[0] must match n_components
+    init = rng.rand(X.shape[1], X.shape[1])
+    n_components = X.shape[1] - 2
+    nca = NeighborhoodComponentsAnalysis(init=init, n_components=n_components)
+    msg = (
+        "The preferred dimensionality of the "
+        f"projected space `n_components` ({n_components}) "
+        "does not match the output dimensionality of the given "
+        f"linear transformation `init` ({init.shape[0]})!"
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        nca.fit(X, y)
+
+
+@pytest.mark.parametrize("n_samples", [3, 5, 7, 11])
+@pytest.mark.parametrize("n_features", [3, 5, 7, 11])
+@pytest.mark.parametrize("n_classes", [5, 7, 11])
+@pytest.mark.parametrize("n_components", [3, 5, 7, 11])
+def test_auto_init(n_samples, n_features, n_classes, n_components):
+    # Test that auto choose the init as expected with every configuration
+    # of order of n_samples, n_features, n_classes and n_components.
+    rng = np.random.RandomState(42)
+    nca_base = NeighborhoodComponentsAnalysis(
+        init="auto", n_components=n_components, max_iter=1, random_state=rng
+    )
+    if n_classes >= n_samples:
+        pass
+        # n_classes > n_samples is impossible, and n_classes == n_samples
+        # throws an error from lda but is an absurd case
+    else:
+        X = rng.randn(n_samples, n_features)
+        y = np.tile(range(n_classes), n_samples // n_classes + 1)[:n_samples]
+        if n_components > n_features:
+            # this would return a ValueError, which is already tested in
+            # test_params_validation
+            pass
+        else:
+            nca = clone(nca_base)
+            nca.fit(X, y)
+            if n_components <= min(n_classes - 1, n_features):
+                nca_other = clone(nca_base).set_params(init="lda")
+            elif n_components < min(n_features, n_samples):
+                nca_other = clone(nca_base).set_params(init="pca")
+            else:
+                nca_other = clone(nca_base).set_params(init="identity")
+            nca_other.fit(X, y)
+            assert_array_almost_equal(nca.components_, nca_other.components_)
+
+
+def test_warm_start_validation():
+    X, y = make_classification(
+        n_samples=30,
+        n_features=5,
+        n_classes=4,
+        n_redundant=0,
+        n_informative=5,
+        random_state=0,
+    )
+
+    nca = NeighborhoodComponentsAnalysis(warm_start=True, max_iter=5)
+    nca.fit(X, y)
+
+    X_less_features, y = make_classification(
+        n_samples=30,
+        n_features=4,
+        n_classes=4,
+        n_redundant=0,
+        n_informative=4,
+        random_state=0,
+    )
+    msg = (
+        f"The new inputs dimensionality ({X_less_features.shape[1]}) "
+        "does not match the input dimensionality of the previously learned "
+        f"transformation ({nca.components_.shape[1]})."
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        nca.fit(X_less_features, y)
+
+
+def test_warm_start_effectiveness():
+    # A 1-iteration second fit on same data should give almost same result
+    # with warm starting, and quite different result without warm starting.
+
+    nca_warm = NeighborhoodComponentsAnalysis(warm_start=True, random_state=0)
+    nca_warm.fit(iris_data, iris_target)
+    transformation_warm = nca_warm.components_
+    nca_warm.max_iter = 1
+    nca_warm.fit(iris_data, iris_target)
+    transformation_warm_plus_one = nca_warm.components_
+
+    nca_cold = NeighborhoodComponentsAnalysis(warm_start=False, random_state=0)
+    nca_cold.fit(iris_data, iris_target)
+    transformation_cold = nca_cold.components_
+    nca_cold.max_iter = 1
+    nca_cold.fit(iris_data, iris_target)
+    transformation_cold_plus_one = nca_cold.components_
+
+    diff_warm = np.sum(np.abs(transformation_warm_plus_one - transformation_warm))
+    diff_cold = np.sum(np.abs(transformation_cold_plus_one - transformation_cold))
+    assert diff_warm < 3.0, (
+        "Transformer changed significantly after one "
+        "iteration even though it was warm-started."
+    )
+
+    assert diff_cold > diff_warm, (
+        "Cold-started transformer changed less "
+        "significantly than warm-started "
+        "transformer after one iteration."
+    )
+
+
+@pytest.mark.parametrize(
+    "init_name", ["pca", "lda", "identity", "random", "precomputed"]
+)
+def test_verbose(init_name, capsys):
+    # assert there is proper output when verbose = 1, for every initialization
+    # except auto because auto will call one of the others
+    rng = np.random.RandomState(42)
+    X, y = make_blobs(n_samples=30, centers=6, n_features=5, random_state=0)
+    regexp_init = r"... done in \ *\d+\.\d{2}s"
+    msgs = {
+        "pca": "Finding principal components" + regexp_init,
+        "lda": "Finding most discriminative components" + regexp_init,
+    }
+    if init_name == "precomputed":
+        init = rng.randn(X.shape[1], X.shape[1])
+    else:
+        init = init_name
+    nca = NeighborhoodComponentsAnalysis(verbose=1, init=init)
+    nca.fit(X, y)
+    out, _ = capsys.readouterr()
+
+    # check output
+    lines = re.split("\n+", out)
+    # if pca or lda init, an additional line is printed, so we test
+    # it and remove it to test the rest equally among initializations
+    if init_name in ["pca", "lda"]:
+        assert re.match(msgs[init_name], lines[0])
+        lines = lines[1:]
+    assert lines[0] == "[NeighborhoodComponentsAnalysis]"
+    header = "{:>10} {:>20} {:>10}".format("Iteration", "Objective Value", "Time(s)")
+    assert lines[1] == "[NeighborhoodComponentsAnalysis] {}".format(header)
+    assert lines[2] == "[NeighborhoodComponentsAnalysis] {}".format("-" * len(header))
+    for line in lines[3:-2]:
+        # The following regex will match for instance:
+        # '[NeighborhoodComponentsAnalysis]  0    6.988936e+01   0.01'
+        assert re.match(
+            r"\[NeighborhoodComponentsAnalysis\] *\d+ *\d\.\d{6}e"
+            r"[+|-]\d+\ *\d+\.\d{2}",
+            line,
+        )
+    assert re.match(
+        r"\[NeighborhoodComponentsAnalysis\] Training took\ *\d+\.\d{2}s\.",
+        lines[-2],
+    )
+    assert lines[-1] == ""
+
+
+def test_no_verbose(capsys):
+    # assert by default there is no output (verbose=0)
+    nca = NeighborhoodComponentsAnalysis()
+    nca.fit(iris_data, iris_target)
+    out, _ = capsys.readouterr()
+    # check output
+    assert out == ""
+
+
+def test_singleton_class():
+    X = iris_data.copy()
+    y = iris_target.copy()
+
+    # one singleton class
+    singleton_class = 1
+    (ind_singleton,) = np.where(y == singleton_class)
+    y[ind_singleton] = 2
+    y[ind_singleton[0]] = singleton_class
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30)
+    nca.fit(X, y)
+
+    # One non-singleton class
+    (ind_1,) = np.where(y == 1)
+    (ind_2,) = np.where(y == 2)
+    y[ind_1] = 0
+    y[ind_1[0]] = 1
+    y[ind_2] = 0
+    y[ind_2[0]] = 2
+
+    nca = NeighborhoodComponentsAnalysis(max_iter=30)
+    nca.fit(X, y)
+
+    # Only singleton classes
+    (ind_0,) = np.where(y == 0)
+    (ind_1,) = np.where(y == 1)
+    (ind_2,) = np.where(y == 2)
+    X = X[[ind_0[0], ind_1[0], ind_2[0]]]
+    y = y[[ind_0[0], ind_1[0], ind_2[0]]]
+
+    nca = NeighborhoodComponentsAnalysis(init="identity", max_iter=30)
+    nca.fit(X, y)
+    assert_array_equal(X, nca.transform(X))
+
+
+def test_one_class():
+    X = iris_data[iris_target == 0]
+    y = iris_target[iris_target == 0]
+
+    nca = NeighborhoodComponentsAnalysis(
+        max_iter=30, n_components=X.shape[1], init="identity"
+    )
+    nca.fit(X, y)
+    assert_array_equal(X, nca.transform(X))
+
+
+def test_callback(capsys):
+    max_iter = 10
+
+    def my_cb(transformation, n_iter):
+        assert transformation.shape == (iris_data.shape[1] ** 2,)
+        rem_iter = max_iter - n_iter
+        print("{} iterations remaining...".format(rem_iter))
+
+    # assert that my_cb is called
+    nca = NeighborhoodComponentsAnalysis(max_iter=max_iter, callback=my_cb, verbose=1)
+    nca.fit(iris_data, iris_target)
+    out, _ = capsys.readouterr()
+
+    # check output
+    assert "{} iterations remaining...".format(max_iter - 1) in out
+
+
+def test_expected_transformation_shape():
+    """Test that the transformation has the expected shape."""
+    X = iris_data
+    y = iris_target
+
+    class TransformationStorer:
+        def __init__(self, X, y):
+            # Initialize a fake NCA and variables needed to call the loss
+            # function:
+            self.fake_nca = NeighborhoodComponentsAnalysis()
+            self.fake_nca.n_iter_ = np.inf
+            self.X, y = validate_data(self.fake_nca, X, y, ensure_min_samples=2)
+            y = LabelEncoder().fit_transform(y)
+            self.same_class_mask = y[:, np.newaxis] == y[np.newaxis, :]
+
+        def callback(self, transformation, n_iter):
+            """Stores the last value of the transformation taken as input by
+            the optimizer"""
+            self.transformation = transformation
+
+    transformation_storer = TransformationStorer(X, y)
+    cb = transformation_storer.callback
+    nca = NeighborhoodComponentsAnalysis(max_iter=5, callback=cb)
+    nca.fit(X, y)
+    assert transformation_storer.transformation.size == X.shape[1] ** 2
+
+
+def test_convergence_warning():
+    nca = NeighborhoodComponentsAnalysis(max_iter=2, verbose=1)
+    cls_name = nca.__class__.__name__
+    msg = "[{}] NCA did not converge".format(cls_name)
+    with pytest.warns(ConvergenceWarning, match=re.escape(msg)):
+        nca.fit(iris_data, iris_target)
+
+
+@pytest.mark.parametrize(
+    "param, value",
+    [
+        ("n_components", np.int32(3)),
+        ("max_iter", np.int32(100)),
+        ("tol", np.float32(0.0001)),
+    ],
+)
+def test_parameters_valid_types(param, value):
+    # check that no error is raised when parameters have numpy integer or
+    # floating types.
+    nca = NeighborhoodComponentsAnalysis(**{param: value})
+
+    X = iris_data
+    y = iris_target
+
+    nca.fit(X, y)
+
+
+@pytest.mark.parametrize("n_components", [None, 2])
+def test_nca_feature_names_out(n_components):
+    """Check `get_feature_names_out` for `NeighborhoodComponentsAnalysis`.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/28293
+    """
+
+    X = iris_data
+    y = iris_target
+
+    est = NeighborhoodComponentsAnalysis(n_components=n_components).fit(X, y)
+    names_out = est.get_feature_names_out()
+
+    class_name_lower = est.__class__.__name__.lower()
+
+    if n_components is not None:
+        expected_n_features = n_components
+    else:
+        expected_n_features = X.shape[1]
+
+    expected_names_out = np.array(
+        [f"{class_name_lower}{i}" for i in range(expected_n_features)],
+        dtype=object,
+    )
+
+    assert_array_equal(names_out, expected_names_out)
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_nearest_centroid.py
@@ -0,0 +1,237 @@
+"""
+Testing for the nearest centroid module.
+"""
+
+import numpy as np
+import pytest
+
+from sklearn import datasets
+from sklearn.neighbors import NearestCentroid
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import CSR_CONTAINERS
+
+# toy sample
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+y = [-1, -1, -1, 1, 1, 1]
+T = [[-1, -1], [2, 2], [3, 2]]
+true_result = [-1, 1, 1]
+true_result_prior1 = [-1, 1, 1]
+
+true_discriminant_scores = [-32, 64, 80]
+true_proba = [[1, 1.26642e-14], [1.60381e-28, 1], [1.80485e-35, 1]]
+
+
+# also load the iris dataset
+# and randomly permute it
+iris = datasets.load_iris()
+rng = np.random.RandomState(1)
+perm = rng.permutation(iris.target.size)
+iris.data = iris.data[perm]
+iris.target = iris.target[perm]
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_classification_toy(csr_container):
+    # Check classification on a toy dataset, including sparse versions.
+    X_csr = csr_container(X)
+    T_csr = csr_container(T)
+
+    # Check classification on a toy dataset, including sparse versions.
+    clf = NearestCentroid()
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert_array_almost_equal(clf.decision_function(T), true_discriminant_scores)
+    assert_array_almost_equal(clf.predict_proba(T), true_proba)
+
+    # Test uniform priors
+    clf = NearestCentroid(priors="uniform")
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert_array_almost_equal(clf.decision_function(T), true_discriminant_scores)
+    assert_array_almost_equal(clf.predict_proba(T), true_proba)
+
+    clf = NearestCentroid(priors="empirical")
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result)
+    assert_array_almost_equal(clf.decision_function(T), true_discriminant_scores)
+    assert_array_almost_equal(clf.predict_proba(T), true_proba)
+
+    # Test custom priors
+    clf = NearestCentroid(priors=[0.25, 0.75])
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T), true_result_prior1)
+
+    # Same test, but with a sparse matrix to fit and test.
+    clf = NearestCentroid()
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.predict(T_csr), true_result)
+
+    # Fit with sparse, test with non-sparse
+    clf = NearestCentroid()
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.predict(T), true_result)
+
+    # Fit with non-sparse, test with sparse
+    clf = NearestCentroid()
+    clf.fit(X, y)
+    assert_array_equal(clf.predict(T_csr), true_result)
+
+    # Fit and predict with non-CSR sparse matrices
+    clf = NearestCentroid()
+    clf.fit(X_csr.tocoo(), y)
+    assert_array_equal(clf.predict(T_csr.tolil()), true_result)
+
+
+def test_iris():
+    # Check consistency on dataset iris.
+    for metric in ("euclidean", "manhattan"):
+        clf = NearestCentroid(metric=metric).fit(iris.data, iris.target)
+        score = np.mean(clf.predict(iris.data) == iris.target)
+        assert score > 0.9, "Failed with score = " + str(score)
+
+
+def test_iris_shrinkage():
+    # Check consistency on dataset iris, when using shrinkage.
+    for metric in ("euclidean", "manhattan"):
+        for shrink_threshold in [None, 0.1, 0.5]:
+            clf = NearestCentroid(metric=metric, shrink_threshold=shrink_threshold)
+            clf = clf.fit(iris.data, iris.target)
+            score = np.mean(clf.predict(iris.data) == iris.target)
+            assert score > 0.8, "Failed with score = " + str(score)
+
+
+def test_pickle():
+    import pickle
+
+    # classification
+    obj = NearestCentroid()
+    obj.fit(iris.data, iris.target)
+    score = obj.score(iris.data, iris.target)
+    s = pickle.dumps(obj)
+
+    obj2 = pickle.loads(s)
+    assert type(obj2) == obj.__class__
+    score2 = obj2.score(iris.data, iris.target)
+    assert_array_equal(
+        score,
+        score2,
+        "Failed to generate same score after pickling (classification).",
+    )
+
+
+def test_shrinkage_correct():
+    # Ensure that the shrinking is correct.
+    # The expected result is calculated by R (pamr),
+    # which is implemented by the author of the original paper.
+    # (One need to modify the code to output the new centroid in pamr.predict)
+
+    X = np.array([[0, 1], [1, 0], [1, 1], [2, 0], [6, 8]])
+    y = np.array([1, 1, 2, 2, 2])
+    clf = NearestCentroid(shrink_threshold=0.1)
+    clf.fit(X, y)
+    expected_result = np.array([[0.7787310, 0.8545292], [2.814179, 2.763647]])
+    np.testing.assert_array_almost_equal(clf.centroids_, expected_result)
+
+
+def test_shrinkage_threshold_decoded_y():
+    clf = NearestCentroid(shrink_threshold=0.01)
+    y_ind = np.asarray(y)
+    y_ind[y_ind == -1] = 0
+    clf.fit(X, y_ind)
+    centroid_encoded = clf.centroids_
+    clf.fit(X, y)
+    assert_array_equal(centroid_encoded, clf.centroids_)
+
+
+def test_predict_translated_data():
+    # Test that NearestCentroid gives same results on translated data
+
+    rng = np.random.RandomState(0)
+    X = rng.rand(50, 50)
+    y = rng.randint(0, 3, 50)
+    noise = rng.rand(50)
+    clf = NearestCentroid(shrink_threshold=0.1)
+    clf.fit(X, y)
+    y_init = clf.predict(X)
+    clf = NearestCentroid(shrink_threshold=0.1)
+    X_noise = X + noise
+    clf.fit(X_noise, y)
+    y_translate = clf.predict(X_noise)
+    assert_array_equal(y_init, y_translate)
+
+
+@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
+def test_manhattan_metric(csr_container):
+    # Test the manhattan metric.
+    X_csr = csr_container(X)
+
+    clf = NearestCentroid(metric="manhattan")
+    clf.fit(X, y)
+    dense_centroid = clf.centroids_
+    clf.fit(X_csr, y)
+    assert_array_equal(clf.centroids_, dense_centroid)
+    assert_array_equal(dense_centroid, [[-1, -1], [1, 1]])
+
+
+def test_features_zero_var():
+    # Test that features with 0 variance throw error
+
+    X = np.empty((10, 2))
+    X[:, 0] = -0.13725701
+    X[:, 1] = -0.9853293
+    y = np.zeros((10))
+    y[0] = 1
+
+    clf = NearestCentroid(shrink_threshold=0.1)
+    with pytest.raises(ValueError):
+        clf.fit(X, y)
+
+
+def test_negative_priors_error():
+    """Check that we raise an error when the user-defined priors are negative."""
+    clf = NearestCentroid(priors=[-2, 4])
+    with pytest.raises(ValueError, match="priors must be non-negative"):
+        clf.fit(X, y)
+
+
+def test_warn_non_normalized_priors():
+    """Check that we raise a warning and normalize the user-defined priors when they
+    don't sum to 1.
+    """
+    priors = [2, 4]
+    clf = NearestCentroid(priors=priors)
+    with pytest.warns(
+        UserWarning,
+        match="The priors do not sum to 1. Normalizing such that it sums to one.",
+    ):
+        clf.fit(X, y)
+
+    assert_allclose(clf.class_prior_, np.asarray(priors) / np.asarray(priors).sum())
+
+
+@pytest.mark.parametrize(
+    "response_method", ["decision_function", "predict_proba", "predict_log_proba"]
+)
+def test_method_not_available_with_manhattan(response_method):
+    """Check that we raise an AttributeError with Manhattan metric when trying
+    to call a non-thresholded response method.
+    """
+    clf = NearestCentroid(metric="manhattan").fit(X, y)
+    with pytest.raises(AttributeError):
+        getattr(clf, response_method)(T)
+
+
+@pytest.mark.parametrize("array_constructor", [np.array] + CSR_CONTAINERS)
+def test_error_zero_variances(array_constructor):
+    """Check that we raise an error when the variance for all features is zero."""
+    X = np.ones((len(y), 2))
+    X[:, 1] *= 2
+    X = array_constructor(X)
+
+    clf = NearestCentroid()
+    with pytest.raises(ValueError, match="All features have zero variance"):
+        clf.fit(X, y)
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_neighbors.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_neighbors.py
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_neighbors_pipeline.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_neighbors_pipeline.py
@@ -0,0 +1,256 @@
+"""
+This is testing the equivalence between some estimators with internal nearest
+neighbors computations, and the corresponding pipeline versions with
+KNeighborsTransformer or RadiusNeighborsTransformer to precompute the
+neighbors.
+"""
+
+import numpy as np
+
+from sklearn.base import clone
+from sklearn.cluster import DBSCAN, SpectralClustering
+from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.datasets import make_blobs
+from sklearn.manifold import TSNE, Isomap, SpectralEmbedding
+from sklearn.neighbors import (
+    KNeighborsRegressor,
+    KNeighborsTransformer,
+    LocalOutlierFactor,
+    RadiusNeighborsRegressor,
+    RadiusNeighborsTransformer,
+)
+from sklearn.pipeline import make_pipeline
+from sklearn.utils._testing import assert_array_almost_equal
+
+
+def test_spectral_clustering():
+    # Test chaining KNeighborsTransformer and SpectralClustering
+    n_neighbors = 5
+    X, _ = make_blobs(random_state=0)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode="connectivity"),
+        SpectralClustering(
+            n_neighbors=n_neighbors, affinity="precomputed", random_state=42
+        ),
+    )
+    est_compact = SpectralClustering(
+        n_neighbors=n_neighbors, affinity="nearest_neighbors", random_state=42
+    )
+    labels_compact = est_compact.fit_predict(X)
+    labels_chain = est_chain.fit_predict(X)
+    assert_array_almost_equal(labels_chain, labels_compact)
+
+
+def test_spectral_embedding():
+    # Test chaining KNeighborsTransformer and SpectralEmbedding
+    n_neighbors = 5
+
+    n_samples = 1000
+    centers = np.array(
+        [
+            [0.0, 5.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 4.0, 0.0, 0.0],
+            [1.0, 0.0, 0.0, 5.0, 1.0],
+        ]
+    )
+    S, true_labels = make_blobs(
+        n_samples=n_samples, centers=centers, cluster_std=1.0, random_state=42
+    )
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode="connectivity"),
+        SpectralEmbedding(
+            n_neighbors=n_neighbors, affinity="precomputed", random_state=42
+        ),
+    )
+    est_compact = SpectralEmbedding(
+        n_neighbors=n_neighbors, affinity="nearest_neighbors", random_state=42
+    )
+    St_compact = est_compact.fit_transform(S)
+    St_chain = est_chain.fit_transform(S)
+    assert_array_almost_equal(St_chain, St_compact)
+
+
+def test_dbscan():
+    # Test chaining RadiusNeighborsTransformer and DBSCAN
+    radius = 0.3
+    n_clusters = 3
+    X = generate_clustered_data(n_clusters=n_clusters)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        RadiusNeighborsTransformer(radius=radius, mode="distance"),
+        DBSCAN(metric="precomputed", eps=radius),
+    )
+    est_compact = DBSCAN(eps=radius)
+
+    labels_chain = est_chain.fit_predict(X)
+    labels_compact = est_compact.fit_predict(X)
+    assert_array_almost_equal(labels_chain, labels_compact)
+
+
+def test_isomap():
+    # Test chaining KNeighborsTransformer and Isomap with
+    # neighbors_algorithm='precomputed'
+    algorithm = "auto"
+    n_neighbors = 10
+
+    X, _ = make_blobs(random_state=0)
+    X2, _ = make_blobs(random_state=1)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(
+            n_neighbors=n_neighbors, algorithm=algorithm, mode="distance"
+        ),
+        Isomap(n_neighbors=n_neighbors, metric="precomputed"),
+    )
+    est_compact = Isomap(n_neighbors=n_neighbors, neighbors_algorithm=algorithm)
+
+    Xt_chain = est_chain.fit_transform(X)
+    Xt_compact = est_compact.fit_transform(X)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+    Xt_chain = est_chain.transform(X2)
+    Xt_compact = est_compact.transform(X2)
+    assert_array_almost_equal(Xt_chain, Xt_compact)
+
+
+def test_tsne():
+    # Test chaining KNeighborsTransformer and TSNE
+    max_iter = 250
+    perplexity = 5
+    n_neighbors = int(3.0 * perplexity + 1)
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(20, 2)
+
+    for metric in ["minkowski", "sqeuclidean"]:
+        # compare the chained version and the compact version
+        est_chain = make_pipeline(
+            KNeighborsTransformer(
+                n_neighbors=n_neighbors, mode="distance", metric=metric
+            ),
+            TSNE(
+                init="random",
+                metric="precomputed",
+                perplexity=perplexity,
+                method="barnes_hut",
+                random_state=42,
+                max_iter=max_iter,
+            ),
+        )
+        est_compact = TSNE(
+            init="random",
+            metric=metric,
+            perplexity=perplexity,
+            max_iter=max_iter,
+            method="barnes_hut",
+            random_state=42,
+        )
+
+        Xt_chain = est_chain.fit_transform(X)
+        Xt_compact = est_compact.fit_transform(X)
+        assert_array_almost_equal(Xt_chain, Xt_compact)
+
+
+def test_lof_novelty_false():
+    # Test chaining KNeighborsTransformer and LocalOutlierFactor
+    n_neighbors = 4
+
+    rng = np.random.RandomState(0)
+    X = rng.randn(40, 2)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode="distance"),
+        LocalOutlierFactor(
+            metric="precomputed",
+            n_neighbors=n_neighbors,
+            novelty=False,
+            contamination="auto",
+        ),
+    )
+    est_compact = LocalOutlierFactor(
+        n_neighbors=n_neighbors, novelty=False, contamination="auto"
+    )
+
+    pred_chain = est_chain.fit_predict(X)
+    pred_compact = est_compact.fit_predict(X)
+    assert_array_almost_equal(pred_chain, pred_compact)
+
+
+def test_lof_novelty_true():
+    # Test chaining KNeighborsTransformer and LocalOutlierFactor
+    n_neighbors = 4
+
+    rng = np.random.RandomState(0)
+    X1 = rng.randn(40, 2)
+    X2 = rng.randn(40, 2)
+
+    # compare the chained version and the compact version
+    est_chain = make_pipeline(
+        KNeighborsTransformer(n_neighbors=n_neighbors, mode="distance"),
+        LocalOutlierFactor(
+            metric="precomputed",
+            n_neighbors=n_neighbors,
+            novelty=True,
+            contamination="auto",
+        ),
+    )
+    est_compact = LocalOutlierFactor(
+        n_neighbors=n_neighbors, novelty=True, contamination="auto"
+    )
+
+    pred_chain = est_chain.fit(X1).predict(X2)
+    pred_compact = est_compact.fit(X1).predict(X2)
+    assert_array_almost_equal(pred_chain, pred_compact)
+
+
+def test_kneighbors_regressor():
+    # Test chaining KNeighborsTransformer and classifiers/regressors
+    rng = np.random.RandomState(0)
+    X = 2 * rng.rand(40, 5) - 1
+    X2 = 2 * rng.rand(40, 5) - 1
+    y = rng.rand(40, 1)
+
+    n_neighbors = 12
+    radius = 1.5
+    # We precompute more neighbors than necessary, to have equivalence between
+    # k-neighbors estimator after radius-neighbors transformer, and vice-versa.
+    factor = 2
+
+    k_trans = KNeighborsTransformer(n_neighbors=n_neighbors, mode="distance")
+    k_trans_factor = KNeighborsTransformer(
+        n_neighbors=int(n_neighbors * factor), mode="distance"
+    )
+
+    r_trans = RadiusNeighborsTransformer(radius=radius, mode="distance")
+    r_trans_factor = RadiusNeighborsTransformer(
+        radius=int(radius * factor), mode="distance"
+    )
+
+    k_reg = KNeighborsRegressor(n_neighbors=n_neighbors)
+    r_reg = RadiusNeighborsRegressor(radius=radius)
+
+    test_list = [
+        (k_trans, k_reg),
+        (k_trans_factor, r_reg),
+        (r_trans, r_reg),
+        (r_trans_factor, k_reg),
+    ]
+
+    for trans, reg in test_list:
+        # compare the chained version and the compact version
+        reg_compact = clone(reg)
+        reg_precomp = clone(reg)
+        reg_precomp.set_params(metric="precomputed")
+
+        reg_chain = make_pipeline(clone(trans), reg_precomp)
+
+        y_pred_chain = reg_chain.fit(X, y).predict(X2)
+        y_pred_compact = reg_compact.fit(X, y).predict(X2)
+        assert_array_almost_equal(y_pred_chain, y_pred_compact)
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_neighbors_tree.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_neighbors_tree.py
@@ -0,0 +1,296 @@
+# SPDX-License-Identifier: BSD-3-Clause
+
+import itertools
+import pickle
+
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_almost_equal
+
+from sklearn.metrics import DistanceMetric
+from sklearn.neighbors._ball_tree import (
+    BallTree,
+    kernel_norm,
+)
+from sklearn.neighbors._ball_tree import (
+    NeighborsHeap64 as NeighborsHeapBT,
+)
+from sklearn.neighbors._ball_tree import (
+    nodeheap_sort as nodeheap_sort_bt,
+)
+from sklearn.neighbors._ball_tree import (
+    simultaneous_sort as simultaneous_sort_bt,
+)
+from sklearn.neighbors._kd_tree import (
+    KDTree,
+)
+from sklearn.neighbors._kd_tree import (
+    NeighborsHeap64 as NeighborsHeapKDT,
+)
+from sklearn.neighbors._kd_tree import (
+    nodeheap_sort as nodeheap_sort_kdt,
+)
+from sklearn.neighbors._kd_tree import (
+    simultaneous_sort as simultaneous_sort_kdt,
+)
+from sklearn.utils import check_random_state
+
+rng = np.random.RandomState(42)
+V_mahalanobis = rng.rand(3, 3)
+V_mahalanobis = np.dot(V_mahalanobis, V_mahalanobis.T)
+
+DIMENSION = 3
+
+METRICS = {
+    "euclidean": {},
+    "manhattan": {},
+    "minkowski": dict(p=3),
+    "chebyshev": {},
+    "seuclidean": dict(V=rng.random_sample(DIMENSION)),
+    "mahalanobis": dict(V=V_mahalanobis),
+}
+
+KD_TREE_METRICS = ["euclidean", "manhattan", "chebyshev", "minkowski"]
+BALL_TREE_METRICS = list(METRICS)
+
+
+def dist_func(x1, x2, p):
+    return np.sum((x1 - x2) ** p) ** (1.0 / p)
+
+
+def compute_kernel_slow(Y, X, kernel, h):
+    d = np.sqrt(((Y[:, None, :] - X) ** 2).sum(-1))
+    norm = kernel_norm(h, X.shape[1], kernel)
+
+    if kernel == "gaussian":
+        return norm * np.exp(-0.5 * (d * d) / (h * h)).sum(-1)
+    elif kernel == "tophat":
+        return norm * (d < h).sum(-1)
+    elif kernel == "epanechnikov":
+        return norm * ((1.0 - (d * d) / (h * h)) * (d < h)).sum(-1)
+    elif kernel == "exponential":
+        return norm * (np.exp(-d / h)).sum(-1)
+    elif kernel == "linear":
+        return norm * ((1 - d / h) * (d < h)).sum(-1)
+    elif kernel == "cosine":
+        return norm * (np.cos(0.5 * np.pi * d / h) * (d < h)).sum(-1)
+    else:
+        raise ValueError("kernel not recognized")
+
+
+def brute_force_neighbors(X, Y, k, metric, **kwargs):
+    D = DistanceMetric.get_metric(metric, **kwargs).pairwise(Y, X)
+    ind = np.argsort(D, axis=1)[:, :k]
+    dist = D[np.arange(Y.shape[0])[:, None], ind]
+    return dist, ind
+
+
+@pytest.mark.parametrize("Cls", [KDTree, BallTree])
+@pytest.mark.parametrize(
+    "kernel", ["gaussian", "tophat", "epanechnikov", "exponential", "linear", "cosine"]
+)
+@pytest.mark.parametrize("h", [0.01, 0.1, 1])
+@pytest.mark.parametrize("rtol", [0, 1e-5])
+@pytest.mark.parametrize("atol", [1e-6, 1e-2])
+@pytest.mark.parametrize("breadth_first", [True, False])
+def test_kernel_density(
+    Cls, kernel, h, rtol, atol, breadth_first, n_samples=100, n_features=3
+):
+    rng = check_random_state(1)
+    X = rng.random_sample((n_samples, n_features))
+    Y = rng.random_sample((n_samples, n_features))
+    dens_true = compute_kernel_slow(Y, X, kernel, h)
+
+    tree = Cls(X, leaf_size=10)
+    dens = tree.kernel_density(
+        Y, h, atol=atol, rtol=rtol, kernel=kernel, breadth_first=breadth_first
+    )
+    assert_allclose(dens, dens_true, atol=atol, rtol=max(rtol, 1e-7))
+
+
+@pytest.mark.parametrize("Cls", [KDTree, BallTree])
+def test_neighbor_tree_query_radius(Cls, n_samples=100, n_features=10):
+    rng = check_random_state(0)
+    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
+    query_pt = np.zeros(n_features, dtype=float)
+
+    eps = 1e-15  # roundoff error can cause test to fail
+    tree = Cls(X, leaf_size=5)
+    rad = np.sqrt(((X - query_pt) ** 2).sum(1))
+
+    for r in np.linspace(rad[0], rad[-1], 100):
+        ind = tree.query_radius([query_pt], r + eps)[0]
+        i = np.where(rad <= r + eps)[0]
+
+        ind.sort()
+        i.sort()
+
+        assert_array_almost_equal(i, ind)
+
+
+@pytest.mark.parametrize("Cls", [KDTree, BallTree])
+def test_neighbor_tree_query_radius_distance(Cls, n_samples=100, n_features=10):
+    rng = check_random_state(0)
+    X = 2 * rng.random_sample(size=(n_samples, n_features)) - 1
+    query_pt = np.zeros(n_features, dtype=float)
+
+    eps = 1e-15  # roundoff error can cause test to fail
+    tree = Cls(X, leaf_size=5)
+    rad = np.sqrt(((X - query_pt) ** 2).sum(1))
+
+    for r in np.linspace(rad[0], rad[-1], 100):
+        ind, dist = tree.query_radius([query_pt], r + eps, return_distance=True)
+
+        ind = ind[0]
+        dist = dist[0]
+
+        d = np.sqrt(((query_pt - X[ind]) ** 2).sum(1))
+
+        assert_array_almost_equal(d, dist)
+
+
+@pytest.mark.parametrize("Cls", [KDTree, BallTree])
+@pytest.mark.parametrize("dualtree", (True, False))
+def test_neighbor_tree_two_point(Cls, dualtree, n_samples=100, n_features=3):
+    rng = check_random_state(0)
+    X = rng.random_sample((n_samples, n_features))
+    Y = rng.random_sample((n_samples, n_features))
+    r = np.linspace(0, 1, 10)
+    tree = Cls(X, leaf_size=10)
+
+    D = DistanceMetric.get_metric("euclidean").pairwise(Y, X)
+    counts_true = [(D <= ri).sum() for ri in r]
+
+    counts = tree.two_point_correlation(Y, r=r, dualtree=dualtree)
+    assert_array_almost_equal(counts, counts_true)
+
+
+@pytest.mark.parametrize("NeighborsHeap", [NeighborsHeapBT, NeighborsHeapKDT])
+def test_neighbors_heap(NeighborsHeap, n_pts=5, n_nbrs=10):
+    heap = NeighborsHeap(n_pts, n_nbrs)
+    rng = check_random_state(0)
+
+    for row in range(n_pts):
+        d_in = rng.random_sample(2 * n_nbrs).astype(np.float64, copy=False)
+        i_in = np.arange(2 * n_nbrs, dtype=np.intp)
+        for d, i in zip(d_in, i_in):
+            heap.push(row, d, i)
+
+        ind = np.argsort(d_in)
+        d_in = d_in[ind]
+        i_in = i_in[ind]
+
+        d_heap, i_heap = heap.get_arrays(sort=True)
+
+        assert_array_almost_equal(d_in[:n_nbrs], d_heap[row])
+        assert_array_almost_equal(i_in[:n_nbrs], i_heap[row])
+
+
+@pytest.mark.parametrize("nodeheap_sort", [nodeheap_sort_bt, nodeheap_sort_kdt])
+def test_node_heap(nodeheap_sort, n_nodes=50):
+    rng = check_random_state(0)
+    vals = rng.random_sample(n_nodes).astype(np.float64, copy=False)
+
+    i1 = np.argsort(vals)
+    vals2, i2 = nodeheap_sort(vals)
+
+    assert_array_almost_equal(i1, i2)
+    assert_array_almost_equal(vals[i1], vals2)
+
+
+@pytest.mark.parametrize(
+    "simultaneous_sort", [simultaneous_sort_bt, simultaneous_sort_kdt]
+)
+def test_simultaneous_sort(simultaneous_sort, n_rows=10, n_pts=201):
+    rng = check_random_state(0)
+    dist = rng.random_sample((n_rows, n_pts)).astype(np.float64, copy=False)
+    ind = (np.arange(n_pts) + np.zeros((n_rows, 1))).astype(np.intp, copy=False)
+
+    dist2 = dist.copy()
+    ind2 = ind.copy()
+
+    # simultaneous sort rows using function
+    simultaneous_sort(dist, ind)
+
+    # simultaneous sort rows using numpy
+    i = np.argsort(dist2, axis=1)
+    row_ind = np.arange(n_rows)[:, None]
+    dist2 = dist2[row_ind, i]
+    ind2 = ind2[row_ind, i]
+
+    assert_array_almost_equal(dist, dist2)
+    assert_array_almost_equal(ind, ind2)
+
+
+@pytest.mark.parametrize("Cls", [KDTree, BallTree])
+def test_gaussian_kde(Cls, n_samples=1000):
+    # Compare gaussian KDE results to scipy.stats.gaussian_kde
+    from scipy.stats import gaussian_kde
+
+    rng = check_random_state(0)
+    x_in = rng.normal(0, 1, n_samples)
+    x_out = np.linspace(-5, 5, 30)
+
+    for h in [0.01, 0.1, 1]:
+        tree = Cls(x_in[:, None])
+        gkde = gaussian_kde(x_in, bw_method=h / np.std(x_in))
+
+        dens_tree = tree.kernel_density(x_out[:, None], h) / n_samples
+        dens_gkde = gkde.evaluate(x_out)
+
+        assert_array_almost_equal(dens_tree, dens_gkde, decimal=3)
+
+
+@pytest.mark.parametrize(
+    "Cls, metric",
+    itertools.chain(
+        [(KDTree, metric) for metric in KD_TREE_METRICS],
+        [(BallTree, metric) for metric in BALL_TREE_METRICS],
+    ),
+)
+@pytest.mark.parametrize("k", (1, 3, 5))
+@pytest.mark.parametrize("dualtree", (True, False))
+@pytest.mark.parametrize("breadth_first", (True, False))
+def test_nn_tree_query(Cls, metric, k, dualtree, breadth_first):
+    rng = check_random_state(0)
+    X = rng.random_sample((40, DIMENSION))
+    Y = rng.random_sample((10, DIMENSION))
+
+    kwargs = METRICS[metric]
+
+    kdt = Cls(X, leaf_size=1, metric=metric, **kwargs)
+    dist1, ind1 = kdt.query(Y, k, dualtree=dualtree, breadth_first=breadth_first)
+    dist2, ind2 = brute_force_neighbors(X, Y, k, metric, **kwargs)
+
+    # don't check indices here: if there are any duplicate distances,
+    # the indices may not match.  Distances should not have this problem.
+    assert_array_almost_equal(dist1, dist2)
+
+
+@pytest.mark.parametrize(
+    "Cls, metric",
+    [(KDTree, "euclidean"), (BallTree, "euclidean"), (BallTree, dist_func)],
+)
+@pytest.mark.parametrize("protocol", (0, 1, 2))
+def test_pickle(Cls, metric, protocol):
+    rng = check_random_state(0)
+    X = rng.random_sample((10, 3))
+
+    if hasattr(metric, "__call__"):
+        kwargs = {"p": 2}
+    else:
+        kwargs = {}
+
+    tree1 = Cls(X, leaf_size=1, metric=metric, **kwargs)
+
+    ind1, dist1 = tree1.query(X)
+
+    s = pickle.dumps(tree1, protocol=protocol)
+    tree2 = pickle.loads(s)
+
+    ind2, dist2 = tree2.query(X)
+
+    assert_array_almost_equal(ind1, ind2)
+    assert_array_almost_equal(dist1, dist2)
+
+    assert isinstance(tree2, Cls)
--- a/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_quad_tree.py
+++ b/venv/lib/python3.12/site-packages/sklearn/neighbors/tests/test_quad_tree.py
@@ -0,0 +1,144 @@
+import pickle
+
+import numpy as np
+import pytest
+
+from sklearn.neighbors._quad_tree import _QuadTree
+from sklearn.utils import check_random_state
+
+
+def test_quadtree_boundary_computation():
+    # Introduce a point into a quad tree with boundaries not easy to compute.
+    Xs = []
+
+    # check a random case
+    Xs.append(np.array([[-1, 1], [-4, -1]], dtype=np.float32))
+    # check the case where only 0 are inserted
+    Xs.append(np.array([[0, 0], [0, 0]], dtype=np.float32))
+    # check the case where only negative are inserted
+    Xs.append(np.array([[-1, -2], [-4, 0]], dtype=np.float32))
+    # check the case where only small numbers are inserted
+    Xs.append(np.array([[-1e-6, 1e-6], [-4e-6, -1e-6]], dtype=np.float32))
+
+    for X in Xs:
+        tree = _QuadTree(n_dimensions=2, verbose=0)
+        tree.build_tree(X)
+        tree._check_coherence()
+
+
+def test_quadtree_similar_point():
+    # Introduce a point into a quad tree where a similar point already exists.
+    # Test will hang if it doesn't complete.
+    Xs = []
+
+    # check the case where points are actually different
+    Xs.append(np.array([[1, 2], [3, 4]], dtype=np.float32))
+    # check the case where points are the same on X axis
+    Xs.append(np.array([[1.0, 2.0], [1.0, 3.0]], dtype=np.float32))
+    # check the case where points are arbitrarily close on X axis
+    Xs.append(np.array([[1.00001, 2.0], [1.00002, 3.0]], dtype=np.float32))
+    # check the case where points are the same on Y axis
+    Xs.append(np.array([[1.0, 2.0], [3.0, 2.0]], dtype=np.float32))
+    # check the case where points are arbitrarily close on Y axis
+    Xs.append(np.array([[1.0, 2.00001], [3.0, 2.00002]], dtype=np.float32))
+    # check the case where points are arbitrarily close on both axes
+    Xs.append(np.array([[1.00001, 2.00001], [1.00002, 2.00002]], dtype=np.float32))
+
+    # check the case where points are arbitrarily close on both axes
+    # close to machine epsilon - x axis
+    Xs.append(np.array([[1, 0.0003817754041], [2, 0.0003817753750]], dtype=np.float32))
+
+    # check the case where points are arbitrarily close on both axes
+    # close to machine epsilon - y axis
+    Xs.append(
+        np.array([[0.0003817754041, 1.0], [0.0003817753750, 2.0]], dtype=np.float32)
+    )
+
+    for X in Xs:
+        tree = _QuadTree(n_dimensions=2, verbose=0)
+        tree.build_tree(X)
+        tree._check_coherence()
+
+
+@pytest.mark.parametrize("n_dimensions", (2, 3))
+@pytest.mark.parametrize("protocol", (0, 1, 2))
+def test_quad_tree_pickle(n_dimensions, protocol):
+    rng = check_random_state(0)
+
+    X = rng.random_sample((10, n_dimensions))
+
+    tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
+    tree.build_tree(X)
+
+    s = pickle.dumps(tree, protocol=protocol)
+    bt2 = pickle.loads(s)
+
+    for x in X:
+        cell_x_tree = tree.get_cell(x)
+        cell_x_bt2 = bt2.get_cell(x)
+        assert cell_x_tree == cell_x_bt2
+
+
+@pytest.mark.parametrize("n_dimensions", (2, 3))
+def test_qt_insert_duplicate(n_dimensions):
+    rng = check_random_state(0)
+
+    X = rng.random_sample((10, n_dimensions))
+    Xd = np.r_[X, X[:5]]
+    tree = _QuadTree(n_dimensions=n_dimensions, verbose=0)
+    tree.build_tree(Xd)
+
+    cumulative_size = tree.cumulative_size
+    leafs = tree.leafs
+
+    # Assert that the first 5 are indeed duplicated and that the next
+    # ones are single point leaf
+    for i, x in enumerate(X):
+        cell_id = tree.get_cell(x)
+        assert leafs[cell_id]
+        assert cumulative_size[cell_id] == 1 + (i < 5)
+
+
+def test_summarize():
+    # Simple check for quad tree's summarize
+
+    angle = 0.9
+    X = np.array(
+        [[-10.0, -10.0], [9.0, 10.0], [10.0, 9.0], [10.0, 10.0]], dtype=np.float32
+    )
+    query_pt = X[0, :]
+    n_dimensions = X.shape[1]
+    offset = n_dimensions + 2
+
+    qt = _QuadTree(n_dimensions, verbose=0)
+    qt.build_tree(X)
+
+    idx, summary = qt._py_summarize(query_pt, X, angle)
+
+    node_dist = summary[n_dimensions]
+    node_size = summary[n_dimensions + 1]
+
+    # Summary should contain only 1 node with size 3 and distance to
+    # X[1:] barycenter
+    barycenter = X[1:].mean(axis=0)
+    ds2c = ((X[0] - barycenter) ** 2).sum()
+
+    assert idx == offset
+    assert node_size == 3, "summary size = {}".format(node_size)
+    assert np.isclose(node_dist, ds2c)
+
+    # Summary should contain all 3 node with size 1 and distance to
+    # each point in X[1:] for ``angle=0``
+    idx, summary = qt._py_summarize(query_pt, X, 0.0)
+    barycenter = X[1:].mean(axis=0)
+    ds2c = ((X[0] - barycenter) ** 2).sum()
+
+    assert idx == 3 * (offset)
+    for i in range(3):
+        node_dist = summary[i * offset + n_dimensions]
+        node_size = summary[i * offset + n_dimensions + 1]
+
+        ds2c = ((X[0] - X[i + 1]) ** 2).sum()
+
+        assert node_size == 1, "summary size = {}".format(node_size)
+        assert np.isclose(node_dist, ds2c)