add read me

This commit is contained in:
2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions

View File

@@ -0,0 +1,24 @@
"""
Common utilities for testing model selection.
"""
import numpy as np
from sklearn.model_selection import KFold
class OneTimeSplitter:
"""A wrapper to make KFold single entry cv iterator"""
def __init__(self, n_splits=4, n_samples=99):
self.n_splits = n_splits
self.n_samples = n_samples
self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
def split(self, X=None, y=None, groups=None):
"""Split can be called only once"""
for index in self.indices:
yield index
def get_n_splits(self, X=None, y=None, groups=None):
return self.n_splits

View File

@@ -0,0 +1,618 @@
import numpy as np
import pytest
from sklearn import config_context
from sklearn.base import clone
from sklearn.datasets import (
load_breast_cancer,
load_iris,
make_classification,
make_multilabel_classification,
)
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
balanced_accuracy_score,
f1_score,
fbeta_score,
make_scorer,
)
from sklearn.metrics._scorer import _CurveScorer
from sklearn.model_selection import (
FixedThresholdClassifier,
StratifiedShuffleSplit,
TunedThresholdClassifierCV,
)
from sklearn.model_selection._classification_threshold import (
_fit_and_score_over_thresholds,
)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils._mocking import CheckingClassifier
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
assert_array_equal,
)
def test_fit_and_score_over_thresholds_curve_scorers():
"""Check that `_fit_and_score_over_thresholds` returns thresholds in ascending order
for the different accepted curve scorers."""
X, y = make_classification(n_samples=100, random_state=0)
train_idx, val_idx = np.arange(50), np.arange(50, 100)
classifier = LogisticRegression()
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
scores, thresholds = _fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params={},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
assert np.all(thresholds[:-1] <= thresholds[1:])
assert isinstance(scores, np.ndarray)
assert np.logical_and(scores >= 0, scores <= 1).all()
def test_fit_and_score_over_thresholds_prefit():
"""Check the behaviour with a prefit classifier."""
X, y = make_classification(n_samples=100, random_state=0)
# `train_idx is None` to indicate that the classifier is prefit
train_idx, val_idx = None, np.arange(50, 100)
classifier = DecisionTreeClassifier(random_state=0).fit(X, y)
# make sure that the classifier memorized the full dataset such that
# we get perfect predictions and thus match the expected score
assert classifier.score(X[val_idx], y[val_idx]) == pytest.approx(1.0)
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=2,
kwargs={},
)
scores, thresholds = _fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params={},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
assert np.all(thresholds[:-1] <= thresholds[1:])
assert_allclose(scores, [0.5, 1.0])
@config_context(enable_metadata_routing=True)
def test_fit_and_score_over_thresholds_sample_weight():
"""Check that we dispatch the sample-weight to fit and score the classifier."""
X, y = load_iris(return_X_y=True)
X, y = X[:100], y[:100] # only 2 classes
# create a dataset and repeat twice the sample of class #0
X_repeated, y_repeated = np.vstack([X, X[y == 0]]), np.hstack([y, y[y == 0]])
# create a sample weight vector that is equivalent to the repeated dataset
sample_weight = np.ones_like(y)
sample_weight[:50] *= 2
classifier = LogisticRegression()
train_repeated_idx = np.arange(X_repeated.shape[0])
val_repeated_idx = np.arange(X_repeated.shape[0])
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
scores_repeated, thresholds_repeated = _fit_and_score_over_thresholds(
classifier,
X_repeated,
y_repeated,
fit_params={},
train_idx=train_repeated_idx,
val_idx=val_repeated_idx,
curve_scorer=curve_scorer,
score_params={},
)
train_idx, val_idx = np.arange(X.shape[0]), np.arange(X.shape[0])
scores, thresholds = _fit_and_score_over_thresholds(
classifier.set_fit_request(sample_weight=True),
X,
y,
fit_params={"sample_weight": sample_weight},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer.set_score_request(sample_weight=True),
score_params={"sample_weight": sample_weight},
)
assert_allclose(thresholds_repeated, thresholds)
assert_allclose(scores_repeated, scores)
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
@config_context(enable_metadata_routing=True)
def test_fit_and_score_over_thresholds_fit_params(fit_params_type):
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
X, y = make_classification(n_samples=100, random_state=0)
fit_params = {
"a": _convert_container(y, fit_params_type),
"b": _convert_container(y, fit_params_type),
}
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
classifier.set_fit_request(a=True, b=True)
train_idx, val_idx = np.arange(50), np.arange(50, 100)
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
_fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params=fit_params,
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
@pytest.mark.parametrize(
"data",
[
make_classification(n_classes=3, n_clusters_per_class=1, random_state=0),
make_multilabel_classification(random_state=0),
],
)
def test_tuned_threshold_classifier_no_binary(data):
"""Check that we raise an informative error message for non-binary problem."""
err_msg = "Only binary classification is supported."
with pytest.raises(ValueError, match=err_msg):
TunedThresholdClassifierCV(LogisticRegression()).fit(*data)
@pytest.mark.parametrize(
"params, err_type, err_msg",
[
(
{"cv": "prefit", "refit": True},
ValueError,
"When cv='prefit', refit cannot be True.",
),
(
{"cv": 10, "refit": False},
ValueError,
"When cv has several folds, refit cannot be False.",
),
(
{"cv": "prefit", "refit": False},
NotFittedError,
"`estimator` must be fitted.",
),
],
)
def test_tuned_threshold_classifier_conflict_cv_refit(params, err_type, err_msg):
"""Check that we raise an informative error message when `cv` and `refit`
cannot be used together.
"""
X, y = make_classification(n_samples=100, random_state=0)
with pytest.raises(err_type, match=err_msg):
TunedThresholdClassifierCV(LogisticRegression(), **params).fit(X, y)
@pytest.mark.parametrize(
"estimator",
[LogisticRegression(), SVC(), GradientBoostingClassifier(n_estimators=4)],
)
@pytest.mark.parametrize(
"response_method", ["predict_proba", "predict_log_proba", "decision_function"]
)
@pytest.mark.parametrize(
"ThresholdClassifier", [FixedThresholdClassifier, TunedThresholdClassifierCV]
)
def test_threshold_classifier_estimator_response_methods(
ThresholdClassifier, estimator, response_method
):
"""Check that `TunedThresholdClassifierCV` exposes the same response methods as the
underlying estimator.
"""
X, y = make_classification(n_samples=100, random_state=0)
model = ThresholdClassifier(estimator=estimator)
assert hasattr(model, response_method) == hasattr(estimator, response_method)
model.fit(X, y)
assert hasattr(model, response_method) == hasattr(estimator, response_method)
if hasattr(model, response_method):
y_pred_cutoff = getattr(model, response_method)(X)
y_pred_underlying_estimator = getattr(model.estimator_, response_method)(X)
assert_allclose(y_pred_cutoff, y_pred_underlying_estimator)
@pytest.mark.parametrize(
"response_method", ["auto", "decision_function", "predict_proba"]
)
def test_tuned_threshold_classifier_without_constraint_value(response_method):
"""Check that `TunedThresholdClassifierCV` is optimizing a given objective
metric."""
X, y = load_breast_cancer(return_X_y=True)
# remove feature to degrade performances
X = X[:, :5]
# make the problem completely imbalanced such that the balanced accuracy is low
indices_pos = np.flatnonzero(y == 1)
indices_pos = indices_pos[: indices_pos.size // 50]
indices_neg = np.flatnonzero(y == 0)
X = np.vstack([X[indices_neg], X[indices_pos]])
y = np.hstack([y[indices_neg], y[indices_pos]])
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
thresholds = 100
model = TunedThresholdClassifierCV(
estimator=lr,
scoring="balanced_accuracy",
response_method=response_method,
thresholds=thresholds,
store_cv_results=True,
)
score_optimized = balanced_accuracy_score(y, model.fit(X, y).predict(X))
score_baseline = balanced_accuracy_score(y, lr.predict(X))
assert score_optimized > score_baseline
assert model.cv_results_["thresholds"].shape == (thresholds,)
assert model.cv_results_["scores"].shape == (thresholds,)
def test_tuned_threshold_classifier_metric_with_parameter():
"""Check that we can pass a metric with a parameter in addition check that
`f_beta` with `beta=1` is equivalent to `f1` and different from `f_beta` with
`beta=2`.
"""
X, y = load_breast_cancer(return_X_y=True)
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
model_fbeta_1 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(fbeta_score, beta=1)
).fit(X, y)
model_fbeta_2 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(fbeta_score, beta=2)
).fit(X, y)
model_f1 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(f1_score)
).fit(X, y)
assert model_fbeta_1.best_threshold_ == pytest.approx(model_f1.best_threshold_)
assert model_fbeta_1.best_threshold_ != pytest.approx(model_fbeta_2.best_threshold_)
@pytest.mark.parametrize(
"response_method", ["auto", "decision_function", "predict_proba"]
)
@pytest.mark.parametrize(
"metric",
[
make_scorer(balanced_accuracy_score),
make_scorer(f1_score, pos_label="cancer"),
],
)
def test_tuned_threshold_classifier_with_string_targets(response_method, metric):
"""Check that targets represented by str are properly managed.
Also, check with several metrics to be sure that `pos_label` is properly
dispatched.
"""
X, y = load_breast_cancer(return_X_y=True)
# Encode numeric targets by meaningful strings. We purposely designed the class
# names such that the `pos_label` is the first alphabetically sorted class and thus
# encoded as 0.
classes = np.array(["cancer", "healthy"], dtype=object)
y = classes[y]
model = TunedThresholdClassifierCV(
estimator=make_pipeline(StandardScaler(), LogisticRegression()),
scoring=metric,
response_method=response_method,
thresholds=100,
).fit(X, y)
assert_array_equal(model.classes_, np.sort(classes))
y_pred = model.predict(X)
assert_array_equal(np.unique(y_pred), np.sort(classes))
@pytest.mark.parametrize("with_sample_weight", [True, False])
@config_context(enable_metadata_routing=True)
def test_tuned_threshold_classifier_refit(with_sample_weight, global_random_seed):
"""Check the behaviour of the `refit` parameter."""
rng = np.random.RandomState(global_random_seed)
X, y = make_classification(n_samples=100, random_state=0)
if with_sample_weight:
sample_weight = rng.randn(X.shape[0])
sample_weight = np.abs(sample_weight, out=sample_weight)
else:
sample_weight = None
# check that `estimator_` if fitted on the full dataset when `refit=True`
estimator = LogisticRegression().set_fit_request(sample_weight=True)
model = TunedThresholdClassifierCV(estimator, refit=True).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is not estimator
estimator.fit(X, y, sample_weight=sample_weight)
assert_allclose(model.estimator_.coef_, estimator.coef_)
assert_allclose(model.estimator_.intercept_, estimator.intercept_)
# check that `estimator_` was not altered when `refit=False` and `cv="prefit"`
estimator = LogisticRegression().set_fit_request(sample_weight=True)
estimator.fit(X, y, sample_weight=sample_weight)
coef = estimator.coef_.copy()
model = TunedThresholdClassifierCV(estimator, cv="prefit", refit=False).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is estimator
assert_allclose(model.estimator_.coef_, coef)
# check that we train `estimator_` on the training split of a given cross-validation
estimator = LogisticRegression().set_fit_request(sample_weight=True)
cv = [
(np.arange(50), np.arange(50, 100)),
] # single split
model = TunedThresholdClassifierCV(estimator, cv=cv, refit=False).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is not estimator
if with_sample_weight:
sw_train = sample_weight[cv[0][0]]
else:
sw_train = None
estimator.fit(X[cv[0][0]], y[cv[0][0]], sample_weight=sw_train)
assert_allclose(model.estimator_.coef_, estimator.coef_)
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
@config_context(enable_metadata_routing=True)
def test_tuned_threshold_classifier_fit_params(fit_params_type):
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
X, y = make_classification(n_samples=100, random_state=0)
fit_params = {
"a": _convert_container(y, fit_params_type),
"b": _convert_container(y, fit_params_type),
}
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
classifier.set_fit_request(a=True, b=True)
model = TunedThresholdClassifierCV(classifier)
model.fit(X, y, **fit_params)
@config_context(enable_metadata_routing=True)
def test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence():
"""Check that passing removing some sample from the dataset `X` is
equivalent to passing a `sample_weight` with a factor 0."""
X, y = load_iris(return_X_y=True)
# Scale the data to avoid any convergence issue
X = StandardScaler().fit_transform(X)
# Only use 2 classes and select samples such that 2-fold cross-validation
# split will lead to an equivalence with a `sample_weight` of 0
X = np.vstack((X[:40], X[50:90]))
y = np.hstack((y[:40], y[50:90]))
sample_weight = np.zeros_like(y)
sample_weight[::2] = 1
estimator = LogisticRegression().set_fit_request(sample_weight=True)
model_without_weights = TunedThresholdClassifierCV(estimator, cv=2)
model_with_weights = clone(model_without_weights)
model_with_weights.fit(X, y, sample_weight=sample_weight)
model_without_weights.fit(X[::2], y[::2])
assert_allclose(
model_with_weights.estimator_.coef_, model_without_weights.estimator_.coef_
)
y_pred_with_weights = model_with_weights.predict_proba(X)
y_pred_without_weights = model_without_weights.predict_proba(X)
assert_allclose(y_pred_with_weights, y_pred_without_weights)
def test_tuned_threshold_classifier_thresholds_array():
"""Check that we can pass an array to `thresholds` and it is used as candidate
threshold internally."""
X, y = make_classification(random_state=0)
estimator = LogisticRegression()
thresholds = np.linspace(0, 1, 11)
tuned_model = TunedThresholdClassifierCV(
estimator,
thresholds=thresholds,
response_method="predict_proba",
store_cv_results=True,
).fit(X, y)
assert_allclose(tuned_model.cv_results_["thresholds"], thresholds)
@pytest.mark.parametrize("store_cv_results", [True, False])
def test_tuned_threshold_classifier_store_cv_results(store_cv_results):
"""Check that if `cv_results_` exists depending on `store_cv_results`."""
X, y = make_classification(random_state=0)
estimator = LogisticRegression()
tuned_model = TunedThresholdClassifierCV(
estimator, store_cv_results=store_cv_results
).fit(X, y)
if store_cv_results:
assert hasattr(tuned_model, "cv_results_")
else:
assert not hasattr(tuned_model, "cv_results_")
def test_tuned_threshold_classifier_cv_float():
"""Check the behaviour when `cv` is set to a float."""
X, y = make_classification(random_state=0)
# case where `refit=False` and cv is a float: the underlying estimator will be fit
# on the training set given by a ShuffleSplit. We check that we get the same model
# coefficients.
test_size = 0.3
estimator = LogisticRegression()
tuned_model = TunedThresholdClassifierCV(
estimator, cv=test_size, refit=False, random_state=0
).fit(X, y)
tuned_model.fit(X, y)
cv = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=0)
train_idx, val_idx = next(cv.split(X, y))
cloned_estimator = clone(estimator).fit(X[train_idx], y[train_idx])
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
# case where `refit=True`, then the underlying estimator is fitted on the full
# dataset.
tuned_model.set_params(refit=True).fit(X, y)
cloned_estimator = clone(estimator).fit(X, y)
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
def test_tuned_threshold_classifier_error_constant_predictor():
"""Check that we raise a ValueError if the underlying classifier returns constant
probabilities such that we cannot find any threshold.
"""
X, y = make_classification(random_state=0)
estimator = DummyClassifier(strategy="constant", constant=1)
tuned_model = TunedThresholdClassifierCV(estimator, response_method="predict_proba")
err_msg = "The provided estimator makes constant predictions"
with pytest.raises(ValueError, match=err_msg):
tuned_model.fit(X, y)
@pytest.mark.parametrize(
"response_method", ["auto", "predict_proba", "decision_function"]
)
def test_fixed_threshold_classifier_equivalence_default(response_method):
"""Check that `FixedThresholdClassifier` has the same behaviour as the vanilla
classifier.
"""
X, y = make_classification(random_state=0)
classifier = LogisticRegression().fit(X, y)
classifier_default_threshold = FixedThresholdClassifier(
estimator=clone(classifier), response_method=response_method
)
classifier_default_threshold.fit(X, y)
# emulate the response method that should take into account the `pos_label`
if response_method in ("auto", "predict_proba"):
y_score = classifier_default_threshold.predict_proba(X)[:, 1]
threshold = 0.5
else: # response_method == "decision_function"
y_score = classifier_default_threshold.decision_function(X)
threshold = 0.0
y_pred_lr = (y_score >= threshold).astype(int)
assert_allclose(classifier_default_threshold.predict(X), y_pred_lr)
@pytest.mark.parametrize(
"response_method, threshold", [("predict_proba", 0.7), ("decision_function", 2.0)]
)
@pytest.mark.parametrize("pos_label", [0, 1])
def test_fixed_threshold_classifier(response_method, threshold, pos_label):
"""Check that applying `predict` lead to the same prediction as applying the
threshold to the output of the response method.
"""
X, y = make_classification(n_samples=50, random_state=0)
logistic_regression = LogisticRegression().fit(X, y)
model = FixedThresholdClassifier(
estimator=clone(logistic_regression),
threshold=threshold,
response_method=response_method,
pos_label=pos_label,
).fit(X, y)
# check that the underlying estimator is the same
assert_allclose(model.estimator_.coef_, logistic_regression.coef_)
# emulate the response method that should take into account the `pos_label`
if response_method == "predict_proba":
y_score = model.predict_proba(X)[:, pos_label]
else: # response_method == "decision_function"
y_score = model.decision_function(X)
y_score = y_score if pos_label == 1 else -y_score
# create a mapping from boolean values to class labels
map_to_label = np.array([0, 1]) if pos_label == 1 else np.array([1, 0])
y_pred_lr = map_to_label[(y_score >= threshold).astype(int)]
assert_allclose(model.predict(X), y_pred_lr)
for method in ("predict_proba", "predict_log_proba", "decision_function"):
assert_allclose(
getattr(model, method)(X), getattr(logistic_regression, method)(X)
)
assert_allclose(
getattr(model.estimator_, method)(X),
getattr(logistic_regression, method)(X),
)
@config_context(enable_metadata_routing=True)
def test_fixed_threshold_classifier_metadata_routing():
"""Check that everything works with metadata routing."""
X, y = make_classification(random_state=0)
sample_weight = np.ones_like(y)
sample_weight[::2] = 2
classifier = LogisticRegression().set_fit_request(sample_weight=True)
classifier.fit(X, y, sample_weight=sample_weight)
classifier_default_threshold = FixedThresholdClassifier(estimator=clone(classifier))
classifier_default_threshold.fit(X, y, sample_weight=sample_weight)
assert_allclose(classifier_default_threshold.estimator_.coef_, classifier.coef_)
@pytest.mark.parametrize(
"method", ["predict_proba", "decision_function", "predict", "predict_log_proba"]
)
def test_fixed_threshold_classifier_fitted_estimator(method):
"""Check that if the underlying estimator is already fitted, no fit is required."""
X, y = make_classification(random_state=0)
classifier = LogisticRegression().fit(X, y)
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
# This should not raise an error
getattr(fixed_threshold_classifier, method)(X)
def test_fixed_threshold_classifier_classes_():
"""Check that the classes_ attribute is properly set."""
X, y = make_classification(random_state=0)
with pytest.raises(
AttributeError, match="The underlying estimator is not fitted yet."
):
FixedThresholdClassifier(estimator=LogisticRegression()).classes_
classifier = LogisticRegression().fit(X, y)
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
assert_array_equal(fixed_threshold_classifier.classes_, classifier.classes_)

View File

@@ -0,0 +1,572 @@
import numpy as np
import pytest
from sklearn.datasets import load_iris
from sklearn.model_selection import (
LearningCurveDisplay,
ValidationCurveDisplay,
learning_curve,
validation_curve,
)
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle
from sklearn.utils._testing import assert_allclose, assert_array_equal
@pytest.fixture
def data():
return shuffle(*load_iris(return_X_y=True), random_state=0)
@pytest.mark.parametrize(
"params, err_type, err_msg",
[
({"std_display_style": "invalid"}, ValueError, "Unknown std_display_style:"),
({"score_type": "invalid"}, ValueError, "Unknown score_type:"),
],
)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_parameters_validation(
pyplot, data, params, err_type, err_msg, CurveDisplay, specific_params
):
"""Check that we raise a proper error when passing invalid parameters."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
with pytest.raises(err_type, match=err_msg):
CurveDisplay.from_estimator(estimator, X, y, **specific_params, **params)
def test_learning_curve_display_default_usage(pyplot, data):
"""Check the default usage of the LearningCurveDisplay class."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
train_sizes = [0.3, 0.6, 0.9]
display = LearningCurveDisplay.from_estimator(
estimator, X, y, train_sizes=train_sizes
)
import matplotlib as mpl
assert display.errorbar_ is None
assert isinstance(display.lines_, list)
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert isinstance(display.fill_between_, list)
for fill in display.fill_between_:
assert isinstance(fill, mpl.collections.PolyCollection)
assert fill.get_alpha() == 0.5
assert display.score_name == "Score"
assert display.ax_.get_xlabel() == "Number of samples in the training set"
assert display.ax_.get_ylabel() == "Score"
_, legend_labels = display.ax_.get_legend_handles_labels()
assert legend_labels == ["Train", "Test"]
train_sizes_abs, train_scores, test_scores = learning_curve(
estimator, X, y, train_sizes=train_sizes
)
assert_array_equal(display.train_sizes, train_sizes_abs)
assert_allclose(display.train_scores, train_scores)
assert_allclose(display.test_scores, test_scores)
def test_validation_curve_display_default_usage(pyplot, data):
"""Check the default usage of the ValidationCurveDisplay class."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name, param_range = "max_depth", [1, 3, 5]
display = ValidationCurveDisplay.from_estimator(
estimator, X, y, param_name=param_name, param_range=param_range
)
import matplotlib as mpl
assert display.errorbar_ is None
assert isinstance(display.lines_, list)
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert isinstance(display.fill_between_, list)
for fill in display.fill_between_:
assert isinstance(fill, mpl.collections.PolyCollection)
assert fill.get_alpha() == 0.5
assert display.score_name == "Score"
assert display.ax_.get_xlabel() == f"{param_name}"
assert display.ax_.get_ylabel() == "Score"
_, legend_labels = display.ax_.get_legend_handles_labels()
assert legend_labels == ["Train", "Test"]
train_scores, test_scores = validation_curve(
estimator, X, y, param_name=param_name, param_range=param_range
)
assert_array_equal(display.param_range, param_range)
assert_allclose(display.train_scores, train_scores)
assert_allclose(display.test_scores, test_scores)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_negate_score(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the `negate_score` parameter calling `from_estimator` and
`plot`.
"""
X, y = data
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
negate_score = False
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
positive_scores = display.lines_[0].get_data()[1]
assert (positive_scores >= 0).all()
assert display.ax_.get_ylabel() == "Score"
negate_score = True
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
negative_scores = display.lines_[0].get_data()[1]
assert (negative_scores <= 0).all()
assert_allclose(negative_scores, -positive_scores)
assert display.ax_.get_ylabel() == "Negative score"
negate_score = False
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
assert display.ax_.get_ylabel() == "Score"
display.plot(negate_score=not negate_score)
assert display.ax_.get_ylabel() == "Score"
assert (display.lines_[0].get_data()[1] < 0).all()
@pytest.mark.parametrize(
"score_name, ylabel", [(None, "Score"), ("Accuracy", "Accuracy")]
)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_score_name(
pyplot, data, score_name, ylabel, CurveDisplay, specific_params
):
"""Check that we can overwrite the default score name shown on the y-axis."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, score_name=score_name
)
assert display.ax_.get_ylabel() == ylabel
X, y = data
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, score_name=score_name
)
assert display.score_name == ylabel
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
def test_learning_curve_display_score_type(pyplot, data, std_display_style):
"""Check the behaviour of setting the `score_type` parameter."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
train_sizes = [0.3, 0.6, 0.9]
train_sizes_abs, train_scores, test_scores = learning_curve(
estimator, X, y, train_sizes=train_sizes
)
score_type = "train"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, train_sizes_abs)
assert_allclose(y_data, train_scores.mean(axis=1))
score_type = "test"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Test"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, train_sizes_abs)
assert_allclose(y_data, test_scores.mean(axis=1))
score_type = "both"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train", "Test"]
if std_display_style is None:
assert len(display.lines_) == 2
assert display.errorbar_ is None
x_data_train, y_data_train = display.lines_[0].get_data()
x_data_test, y_data_test = display.lines_[1].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 2
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
assert_array_equal(x_data_train, train_sizes_abs)
assert_allclose(y_data_train, train_scores.mean(axis=1))
assert_array_equal(x_data_test, train_sizes_abs)
assert_allclose(y_data_test, test_scores.mean(axis=1))
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
def test_validation_curve_display_score_type(pyplot, data, std_display_style):
"""Check the behaviour of setting the `score_type` parameter."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name, param_range = "max_depth", [1, 3, 5]
train_scores, test_scores = validation_curve(
estimator, X, y, param_name=param_name, param_range=param_range
)
score_type = "train"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, param_range)
assert_allclose(y_data, train_scores.mean(axis=1))
score_type = "test"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Test"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, param_range)
assert_allclose(y_data, test_scores.mean(axis=1))
score_type = "both"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train", "Test"]
if std_display_style is None:
assert len(display.lines_) == 2
assert display.errorbar_ is None
x_data_train, y_data_train = display.lines_[0].get_data()
x_data_test, y_data_test = display.lines_[1].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 2
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
assert_array_equal(x_data_train, param_range)
assert_allclose(y_data_train, train_scores.mean(axis=1))
assert_array_equal(x_data_test, param_range)
assert_allclose(y_data_test, test_scores.mean(axis=1))
@pytest.mark.parametrize(
"CurveDisplay, specific_params, expected_xscale",
[
(
ValidationCurveDisplay,
{"param_name": "max_depth", "param_range": np.arange(1, 5)},
"linear",
),
(LearningCurveDisplay, {"train_sizes": np.linspace(0.1, 0.9, num=5)}, "linear"),
(
ValidationCurveDisplay,
{
"param_name": "max_depth",
"param_range": np.round(np.logspace(0, 2, num=5)).astype(np.int64),
},
"log",
),
(LearningCurveDisplay, {"train_sizes": np.logspace(-1, 0, num=5)}, "log"),
],
)
def test_curve_display_xscale_auto(
pyplot, data, CurveDisplay, specific_params, expected_xscale
):
"""Check the behaviour of the x-axis scaling depending on the data provided."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
display = CurveDisplay.from_estimator(estimator, X, y, **specific_params)
assert display.ax_.get_xscale() == expected_xscale
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_std_display_style(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the parameter `std_display_style`."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
import matplotlib as mpl
std_display_style = None
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert len(display.lines_) == 2
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert display.errorbar_ is None
assert display.fill_between_ is None
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
std_display_style = "fill_between"
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert len(display.lines_) == 2
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert display.errorbar_ is None
assert len(display.fill_between_) == 2
for fill_between in display.fill_between_:
assert isinstance(fill_between, mpl.collections.PolyCollection)
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
std_display_style = "errorbar"
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert display.lines_ is None
assert len(display.errorbar_) == 2
for errorbar in display.errorbar_:
assert isinstance(errorbar, mpl.container.ErrorbarContainer)
assert display.fill_between_ is None
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_plot_kwargs(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the different plotting keyword arguments: `line_kw`,
`fill_between_kw`, and `errorbar_kw`."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
std_display_style = "fill_between"
line_kw = {"color": "red"}
fill_between_kw = {"color": "red", "alpha": 1.0}
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
)
assert display.lines_[0].get_color() == "red"
assert_allclose(
display.fill_between_[0].get_facecolor(),
[[1.0, 0.0, 0.0, 1.0]], # trust me, it's red
)
std_display_style = "errorbar"
errorbar_kw = {"color": "red"}
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
errorbar_kw=errorbar_kw,
)
assert display.errorbar_[0].lines[0].get_color() == "red"
@pytest.mark.parametrize(
"param_range, xscale",
[([5, 10, 15], "linear"), ([-50, 5, 50, 500], "symlog"), ([5, 50, 500], "log")],
)
def test_validation_curve_xscale_from_param_range_provided_as_a_list(
pyplot, data, param_range, xscale
):
"""Check the induced xscale from the provided param_range values."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name = "max_depth"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
)
assert display.ax_.get_xscale() == xscale
@pytest.mark.parametrize(
"Display, params",
[
(LearningCurveDisplay, {}),
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
],
)
def test_subclassing_displays(pyplot, data, Display, params):
"""Check that named constructors return the correct type when subclassed.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/pull/27675
"""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
class SubclassOfDisplay(Display):
pass
display = SubclassOfDisplay.from_estimator(estimator, X, y, **params)
assert isinstance(display, SubclassOfDisplay)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,853 @@
from math import ceil
import numpy as np
import pytest
from scipy.stats import expon, norm, randint
from sklearn.datasets import make_classification
from sklearn.dummy import DummyClassifier
from sklearn.experimental import enable_halving_search_cv # noqa: F401
from sklearn.model_selection import (
GroupKFold,
GroupShuffleSplit,
HalvingGridSearchCV,
HalvingRandomSearchCV,
KFold,
LeaveOneGroupOut,
LeavePGroupsOut,
ShuffleSplit,
StratifiedKFold,
StratifiedShuffleSplit,
)
from sklearn.model_selection._search_successive_halving import (
_SubsampleMetaSplitter,
_top_k,
)
from sklearn.model_selection.tests.test_search import (
check_cv_results_array_types,
check_cv_results_keys,
)
from sklearn.svm import SVC, LinearSVC
class FastClassifier(DummyClassifier):
"""Dummy classifier that accepts parameters a, b, ... z.
These parameter don't affect the predictions and are useful for fast
grid searching."""
# update the constraints such that we accept all parameters from a to z
_parameter_constraints: dict = {
**DummyClassifier._parameter_constraints,
**{chr(key): "no_validation" for key in range(ord("a"), ord("z") + 1)},
}
def __init__(
self, strategy="stratified", random_state=None, constant=None, **kwargs
):
super().__init__(
strategy=strategy, random_state=random_state, constant=constant
)
def get_params(self, deep=False):
params = super().get_params(deep=deep)
for char in range(ord("a"), ord("z") + 1):
params[chr(char)] = "whatever"
return params
class SometimesFailClassifier(DummyClassifier):
def __init__(
self,
strategy="stratified",
random_state=None,
constant=None,
n_estimators=10,
fail_fit=False,
fail_predict=False,
a=0,
):
self.fail_fit = fail_fit
self.fail_predict = fail_predict
self.n_estimators = n_estimators
self.a = a
super().__init__(
strategy=strategy, random_state=random_state, constant=constant
)
def fit(self, X, y):
if self.fail_fit:
raise Exception("fitting failed")
return super().fit(X, y)
def predict(self, X):
if self.fail_predict:
raise Exception("predict failed")
return super().predict(X)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.FitFailedWarning")
@pytest.mark.filterwarnings("ignore:Scoring failed:UserWarning")
@pytest.mark.filterwarnings("ignore:One or more of the:UserWarning")
@pytest.mark.parametrize("HalvingSearch", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize("fail_at", ("fit", "predict"))
def test_nan_handling(HalvingSearch, fail_at):
"""Check the selection of the best scores in presence of failure represented by
NaN values."""
n_samples = 1_000
X, y = make_classification(n_samples=n_samples, random_state=0)
search = HalvingSearch(
SometimesFailClassifier(),
{f"fail_{fail_at}": [False, True], "a": range(3)},
resource="n_estimators",
max_resources=6,
min_resources=1,
factor=2,
)
search.fit(X, y)
# estimators that failed during fit/predict should always rank lower
# than ones where the fit/predict succeeded
assert not search.best_params_[f"fail_{fail_at}"]
scores = search.cv_results_["mean_test_score"]
ranks = search.cv_results_["rank_test_score"]
# some scores should be NaN
assert np.isnan(scores).any()
unique_nan_ranks = np.unique(ranks[np.isnan(scores)])
# all NaN scores should have the same rank
assert unique_nan_ranks.shape[0] == 1
# NaNs should have the lowest rank
assert (unique_nan_ranks[0] >= ranks).all()
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
(
"aggressive_elimination,"
"max_resources,"
"expected_n_iterations,"
"expected_n_required_iterations,"
"expected_n_possible_iterations,"
"expected_n_remaining_candidates,"
"expected_n_candidates,"
"expected_n_resources,"
),
[
# notice how it loops at the beginning
# also, the number of candidates evaluated at the last iteration is
# <= factor
(True, "limited", 4, 4, 3, 1, [60, 20, 7, 3], [20, 20, 60, 180]),
# no aggressive elimination: we end up with less iterations, and
# the number of candidates at the last iter is > factor, which isn't
# ideal
(False, "limited", 3, 4, 3, 3, [60, 20, 7], [20, 60, 180]),
# # When the amount of resource isn't limited, aggressive_elimination
# # has no effect. Here the default min_resources='exhaust' will take
# # over.
(True, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
(False, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
],
)
def test_aggressive_elimination(
Est,
aggressive_elimination,
max_resources,
expected_n_iterations,
expected_n_required_iterations,
expected_n_possible_iterations,
expected_n_remaining_candidates,
expected_n_candidates,
expected_n_resources,
):
# Test the aggressive_elimination parameter.
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifier()
if max_resources == "limited":
max_resources = 180
else:
max_resources = n_samples
sh = Est(
base_estimator,
param_grid,
aggressive_elimination=aggressive_elimination,
max_resources=max_resources,
factor=3,
)
sh.set_params(verbose=True) # just for test coverage
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
assert sh.n_iterations_ == expected_n_iterations
assert sh.n_required_iterations_ == expected_n_required_iterations
assert sh.n_possible_iterations_ == expected_n_possible_iterations
assert sh.n_resources_ == expected_n_resources
assert sh.n_candidates_ == expected_n_candidates
assert sh.n_remaining_candidates_ == expected_n_remaining_candidates
assert ceil(sh.n_candidates_[-1] / sh.factor) == sh.n_remaining_candidates_
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
(
"min_resources,"
"max_resources,"
"expected_n_iterations,"
"expected_n_possible_iterations,"
"expected_n_resources,"
),
[
# with enough resources
("smallest", "auto", 2, 4, [20, 60]),
# with enough resources but min_resources set manually
(50, "auto", 2, 3, [50, 150]),
# without enough resources, only one iteration can be done
("smallest", 30, 1, 1, [20]),
# with exhaust: use as much resources as possible at the last iter
("exhaust", "auto", 2, 2, [333, 999]),
("exhaust", 1000, 2, 2, [333, 999]),
("exhaust", 999, 2, 2, [333, 999]),
("exhaust", 600, 2, 2, [200, 600]),
("exhaust", 599, 2, 2, [199, 597]),
("exhaust", 300, 2, 2, [100, 300]),
("exhaust", 60, 2, 2, [20, 60]),
("exhaust", 50, 1, 1, [20]),
("exhaust", 20, 1, 1, [20]),
],
)
def test_min_max_resources(
Est,
min_resources,
max_resources,
expected_n_iterations,
expected_n_possible_iterations,
expected_n_resources,
):
# Test the min_resources and max_resources parameters, and how they affect
# the number of resources used at each iteration
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": [1, 2], "b": [1, 2, 3]}
base_estimator = FastClassifier()
sh = Est(
base_estimator,
param_grid,
factor=3,
min_resources=min_resources,
max_resources=max_resources,
)
if Est is HalvingRandomSearchCV:
sh.set_params(n_candidates=6) # same number as with the grid
sh.fit(X, y)
expected_n_required_iterations = 2 # given 6 combinations and factor = 3
assert sh.n_iterations_ == expected_n_iterations
assert sh.n_required_iterations_ == expected_n_required_iterations
assert sh.n_possible_iterations_ == expected_n_possible_iterations
assert sh.n_resources_ == expected_n_resources
if min_resources == "exhaust":
assert sh.n_possible_iterations_ == sh.n_iterations_ == len(sh.n_resources_)
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
@pytest.mark.parametrize(
"max_resources, n_iterations, n_possible_iterations",
[
("auto", 5, 9), # all resources are used
(1024, 5, 9),
(700, 5, 8),
(512, 5, 8),
(511, 5, 7),
(32, 4, 4),
(31, 3, 3),
(16, 3, 3),
(4, 1, 1), # max_resources == min_resources, only one iteration is
# possible
],
)
def test_n_iterations(Est, max_resources, n_iterations, n_possible_iterations):
# test the number of actual iterations that were run depending on
# max_resources
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=1)
param_grid = {"a": [1, 2], "b": list(range(10))}
base_estimator = FastClassifier()
factor = 2
sh = Est(
base_estimator,
param_grid,
cv=2,
factor=factor,
max_resources=max_resources,
min_resources=4,
)
if Est is HalvingRandomSearchCV:
sh.set_params(n_candidates=20) # same as for HalvingGridSearchCV
sh.fit(X, y)
assert sh.n_required_iterations_ == 5
assert sh.n_iterations_ == n_iterations
assert sh.n_possible_iterations_ == n_possible_iterations
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
def test_resource_parameter(Est):
# Test the resource parameter
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": [1, 2], "b": list(range(10))}
base_estimator = FastClassifier()
sh = Est(base_estimator, param_grid, cv=2, resource="c", max_resources=10, factor=3)
sh.fit(X, y)
assert set(sh.n_resources_) == set([1, 3, 9])
for r_i, params, param_c in zip(
sh.cv_results_["n_resources"],
sh.cv_results_["params"],
sh.cv_results_["param_c"],
):
assert r_i == params["c"] == param_c
with pytest.raises(
ValueError, match="Cannot use resource=1234 which is not supported "
):
sh = HalvingGridSearchCV(
base_estimator, param_grid, cv=2, resource="1234", max_resources=10
)
sh.fit(X, y)
with pytest.raises(
ValueError,
match=(
"Cannot use parameter c as the resource since it is part "
"of the searched parameters."
),
):
param_grid = {"a": [1, 2], "b": [1, 2], "c": [1, 3]}
sh = HalvingGridSearchCV(
base_estimator, param_grid, cv=2, resource="c", max_resources=10
)
sh.fit(X, y)
@pytest.mark.parametrize(
"max_resources, n_candidates, expected_n_candidates",
[
(512, "exhaust", 128), # generate exactly as much as needed
(32, "exhaust", 8),
(32, 8, 8),
(32, 7, 7), # ask for less than what we could
(32, 9, 9), # ask for more than 'reasonable'
],
)
def test_random_search(max_resources, n_candidates, expected_n_candidates):
# Test random search and make sure the number of generated candidates is
# as expected
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": norm, "b": norm}
base_estimator = FastClassifier()
sh = HalvingRandomSearchCV(
base_estimator,
param_grid,
n_candidates=n_candidates,
cv=2,
max_resources=max_resources,
factor=2,
min_resources=4,
)
sh.fit(X, y)
assert sh.n_candidates_[0] == expected_n_candidates
if n_candidates == "exhaust":
# Make sure 'exhaust' makes the last iteration use as much resources as
# we can
assert sh.n_resources_[-1] == max_resources
@pytest.mark.parametrize(
"param_distributions, expected_n_candidates",
[
({"a": [1, 2]}, 2), # all lists, sample less than n_candidates
({"a": randint(1, 3)}, 10), # not all list, respect n_candidates
],
)
def test_random_search_discrete_distributions(
param_distributions, expected_n_candidates
):
# Make sure random search samples the appropriate number of candidates when
# we ask for more than what's possible. How many parameters are sampled
# depends whether the distributions are 'all lists' or not (see
# ParameterSampler for details). This is somewhat redundant with the checks
# in ParameterSampler but interaction bugs were discovered during
# development of SH
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=0)
base_estimator = FastClassifier()
sh = HalvingRandomSearchCV(base_estimator, param_distributions, n_candidates=10)
sh.fit(X, y)
assert sh.n_candidates_[0] == expected_n_candidates
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
"params, expected_error_message",
[
(
{"resource": "not_a_parameter"},
"Cannot use resource=not_a_parameter which is not supported",
),
(
{"resource": "a", "max_resources": 100},
"Cannot use parameter a as the resource since it is part of",
),
(
{"max_resources": "auto", "resource": "b"},
"resource can only be 'n_samples' when max_resources='auto'",
),
(
{"min_resources": 15, "max_resources": 14},
"min_resources_=15 is greater than max_resources_=14",
),
({"cv": KFold(shuffle=True)}, "must yield consistent folds"),
({"cv": ShuffleSplit()}, "must yield consistent folds"),
],
)
def test_input_errors(Est, params, expected_error_message):
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X, y = make_classification(100)
sh = Est(base_estimator, param_grid, **params)
with pytest.raises(ValueError, match=expected_error_message):
sh.fit(X, y)
@pytest.mark.parametrize(
"params, expected_error_message",
[
(
{"n_candidates": "exhaust", "min_resources": "exhaust"},
"cannot be both set to 'exhaust'",
),
],
)
def test_input_errors_randomized(params, expected_error_message):
# tests specific to HalvingRandomSearchCV
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X, y = make_classification(100)
sh = HalvingRandomSearchCV(base_estimator, param_grid, **params)
with pytest.raises(ValueError, match=expected_error_message):
sh.fit(X, y)
@pytest.mark.parametrize(
"fraction, subsample_test, expected_train_size, expected_test_size",
[
(0.5, True, 40, 10),
(0.5, False, 40, 20),
(0.2, True, 16, 4),
(0.2, False, 16, 20),
],
)
def test_subsample_splitter_shapes(
fraction, subsample_test, expected_train_size, expected_test_size
):
# Make sure splits returned by SubsampleMetaSplitter are of appropriate
# size
n_samples = 100
X, y = make_classification(n_samples)
cv = _SubsampleMetaSplitter(
base_cv=KFold(5),
fraction=fraction,
subsample_test=subsample_test,
random_state=None,
)
for train, test in cv.split(X, y):
assert train.shape[0] == expected_train_size
assert test.shape[0] == expected_test_size
if subsample_test:
assert train.shape[0] + test.shape[0] == int(n_samples * fraction)
else:
assert test.shape[0] == n_samples // cv.base_cv.get_n_splits()
@pytest.mark.parametrize("subsample_test", (True, False))
def test_subsample_splitter_determinism(subsample_test):
# Make sure _SubsampleMetaSplitter is consistent across calls to split():
# - we're OK having training sets differ (they're always sampled with a
# different fraction anyway)
# - when we don't subsample the test set, we want it to be always the same.
# This check is the most important. This is ensured by the determinism
# of the base_cv.
# Note: we could force both train and test splits to be always the same if
# we drew an int seed in _SubsampleMetaSplitter.__init__
n_samples = 100
X, y = make_classification(n_samples)
cv = _SubsampleMetaSplitter(
base_cv=KFold(5), fraction=0.5, subsample_test=subsample_test, random_state=None
)
folds_a = list(cv.split(X, y, groups=None))
folds_b = list(cv.split(X, y, groups=None))
for (train_a, test_a), (train_b, test_b) in zip(folds_a, folds_b):
assert not np.all(train_a == train_b)
if subsample_test:
assert not np.all(test_a == test_b)
else:
assert np.all(test_a == test_b)
assert np.all(X[test_a] == X[test_b])
@pytest.mark.parametrize(
"k, itr, expected",
[
(1, 0, ["c"]),
(2, 0, ["a", "c"]),
(4, 0, ["d", "b", "a", "c"]),
(10, 0, ["d", "b", "a", "c"]),
(1, 1, ["e"]),
(2, 1, ["f", "e"]),
(10, 1, ["f", "e"]),
(1, 2, ["i"]),
(10, 2, ["g", "h", "i"]),
],
)
def test_top_k(k, itr, expected):
results = { # this isn't a 'real world' result dict
"iter": [0, 0, 0, 0, 1, 1, 2, 2, 2],
"mean_test_score": [4, 3, 5, 1, 11, 10, 5, 6, 9],
"params": ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
}
got = _top_k(results, k=k, itr=itr)
assert np.all(got == expected)
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
def test_cv_results(Est):
# test that the cv_results_ matches correctly the logic of the
# tournament: in particular that the candidates continued in each
# successive iteration are those that were best in the previous iteration
pd = pytest.importorskip("pandas")
rng = np.random.RandomState(0)
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifier()
# generate random scores: we want to avoid ties, which would otherwise
# mess with the ordering and make testing harder
def scorer(est, X, y):
return rng.rand()
sh = Est(base_estimator, param_grid, factor=2, scoring=scorer)
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
# non-regression check for
# https://github.com/scikit-learn/scikit-learn/issues/19203
assert isinstance(sh.cv_results_["iter"], np.ndarray)
assert isinstance(sh.cv_results_["n_resources"], np.ndarray)
cv_results_df = pd.DataFrame(sh.cv_results_)
# just make sure we don't have ties
assert len(cv_results_df["mean_test_score"].unique()) == len(cv_results_df)
cv_results_df["params_str"] = cv_results_df["params"].apply(str)
table = cv_results_df.pivot(
index="params_str", columns="iter", values="mean_test_score"
)
# table looks like something like this:
# iter 0 1 2 3 4 5
# params_str
# {'a': 'l2', 'b': 23} 0.75 NaN NaN NaN NaN NaN
# {'a': 'l1', 'b': 30} 0.90 0.875 NaN NaN NaN NaN
# {'a': 'l1', 'b': 0} 0.75 NaN NaN NaN NaN NaN
# {'a': 'l2', 'b': 3} 0.85 0.925 0.9125 0.90625 NaN NaN
# {'a': 'l1', 'b': 5} 0.80 NaN NaN NaN NaN NaN
# ...
# where a NaN indicates that the candidate wasn't evaluated at a given
# iteration, because it wasn't part of the top-K at some previous
# iteration. We here make sure that candidates that aren't in the top-k at
# any given iteration are indeed not evaluated at the subsequent
# iterations.
nan_mask = pd.isna(table)
n_iter = sh.n_iterations_
for it in range(n_iter - 1):
already_discarded_mask = nan_mask[it]
# make sure that if a candidate is already discarded, we don't evaluate
# it later
assert (
already_discarded_mask & nan_mask[it + 1] == already_discarded_mask
).all()
# make sure that the number of discarded candidate is correct
discarded_now_mask = ~already_discarded_mask & nan_mask[it + 1]
kept_mask = ~already_discarded_mask & ~discarded_now_mask
assert kept_mask.sum() == sh.n_candidates_[it + 1]
# make sure that all discarded candidates have a lower score than the
# kept candidates
discarded_max_score = table[it].where(discarded_now_mask).max()
kept_min_score = table[it].where(kept_mask).min()
assert discarded_max_score < kept_min_score
# We now make sure that the best candidate is chosen only from the last
# iteration.
# We also make sure this is true even if there were higher scores in
# earlier rounds (this isn't generally the case, but worth ensuring it's
# possible).
last_iter = cv_results_df["iter"].max()
idx_best_last_iter = cv_results_df[cv_results_df["iter"] == last_iter][
"mean_test_score"
].idxmax()
idx_best_all_iters = cv_results_df["mean_test_score"].idxmax()
assert sh.best_params_ == cv_results_df.iloc[idx_best_last_iter]["params"]
assert (
cv_results_df.iloc[idx_best_last_iter]["mean_test_score"]
< cv_results_df.iloc[idx_best_all_iters]["mean_test_score"]
)
assert (
cv_results_df.iloc[idx_best_last_iter]["params"]
!= cv_results_df.iloc[idx_best_all_iters]["params"]
)
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
def test_base_estimator_inputs(Est):
# make sure that the base estimators are passed the correct parameters and
# number of samples at each iteration.
pd = pytest.importorskip("pandas")
passed_n_samples_fit = []
passed_n_samples_predict = []
passed_params = []
class FastClassifierBookKeeping(FastClassifier):
def fit(self, X, y):
passed_n_samples_fit.append(X.shape[0])
return super().fit(X, y)
def predict(self, X):
passed_n_samples_predict.append(X.shape[0])
return super().predict(X)
def set_params(self, **params):
passed_params.append(params)
return super().set_params(**params)
n_samples = 1024
n_splits = 2
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifierBookKeeping()
sh = Est(
base_estimator,
param_grid,
factor=2,
cv=n_splits,
return_train_score=False,
refit=False,
)
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
assert len(passed_n_samples_fit) == len(passed_n_samples_predict)
passed_n_samples = [
x + y for (x, y) in zip(passed_n_samples_fit, passed_n_samples_predict)
]
# Lists are of length n_splits * n_iter * n_candidates_at_i.
# Each chunk of size n_splits corresponds to the n_splits folds for the
# same candidate at the same iteration, so they contain equal values. We
# subsample such that the lists are of length n_iter * n_candidates_at_it
passed_n_samples = passed_n_samples[::n_splits]
passed_params = passed_params[::n_splits]
cv_results_df = pd.DataFrame(sh.cv_results_)
assert len(passed_params) == len(passed_n_samples) == len(cv_results_df)
uniques, counts = np.unique(passed_n_samples, return_counts=True)
assert (sh.n_resources_ == uniques).all()
assert (sh.n_candidates_ == counts).all()
assert (cv_results_df["params"] == passed_params).all()
assert (cv_results_df["n_resources"] == passed_n_samples).all()
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
def test_groups_support(Est):
# Check if ValueError (when groups is None) propagates to
# HalvingGridSearchCV and HalvingRandomSearchCV
# And also check if groups is correctly passed to the cv object
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=50, n_classes=2, random_state=0)
groups = rng.randint(0, 3, 50)
clf = LinearSVC(random_state=0)
grid = {"C": [1]}
group_cvs = [
LeaveOneGroupOut(),
LeavePGroupsOut(2),
GroupKFold(n_splits=3),
GroupShuffleSplit(random_state=0),
]
error_msg = "The 'groups' parameter should not be None."
for cv in group_cvs:
gs = Est(clf, grid, cv=cv, random_state=0)
with pytest.raises(ValueError, match=error_msg):
gs.fit(X, y)
gs.fit(X, y, groups=groups)
non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit(random_state=0)]
for cv in non_group_cvs:
gs = Est(clf, grid, cv=cv)
# Should not raise an error
gs.fit(X, y)
@pytest.mark.parametrize("SearchCV", [HalvingRandomSearchCV, HalvingGridSearchCV])
def test_min_resources_null(SearchCV):
"""Check that we raise an error if the minimum resources is set to 0."""
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X = np.empty(0).reshape(0, 3)
search = SearchCV(base_estimator, param_grid, min_resources="smallest")
err_msg = "min_resources_=0: you might have passed an empty dataset X."
with pytest.raises(ValueError, match=err_msg):
search.fit(X, [])
@pytest.mark.parametrize("SearchCV", [HalvingGridSearchCV, HalvingRandomSearchCV])
def test_select_best_index(SearchCV):
"""Check the selection strategy of the halving search."""
results = { # this isn't a 'real world' result dict
"iter": np.array([0, 0, 0, 0, 1, 1, 2, 2, 2]),
"mean_test_score": np.array([4, 3, 5, 1, 11, 10, 5, 6, 9]),
"params": np.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
}
# we expect the index of 'i'
best_index = SearchCV._select_best_index(None, None, results)
assert best_index == 8
def test_halving_random_search_list_of_dicts():
"""Check the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
being a list of dictionary.
"""
X, y = make_classification(n_samples=150, n_features=4, random_state=42)
params = [
{"kernel": ["rbf"], "C": expon(scale=10), "gamma": expon(scale=0.1)},
{"kernel": ["poly"], "degree": [2, 3]},
]
param_keys = (
"param_C",
"param_degree",
"param_gamma",
"param_kernel",
)
score_keys = (
"mean_test_score",
"mean_train_score",
"rank_test_score",
"split0_test_score",
"split1_test_score",
"split2_test_score",
"split0_train_score",
"split1_train_score",
"split2_train_score",
"std_test_score",
"std_train_score",
"mean_fit_time",
"std_fit_time",
"mean_score_time",
"std_score_time",
)
extra_keys = ("n_resources", "iter")
search = HalvingRandomSearchCV(
SVC(), cv=3, param_distributions=params, return_train_score=True, random_state=0
)
search.fit(X, y)
n_candidates = sum(search.n_candidates_)
cv_results = search.cv_results_
# Check results structure
check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates, extra_keys)
expected_cv_results_kinds = {
"param_C": "f",
"param_degree": "i",
"param_gamma": "f",
"param_kernel": "O",
}
check_cv_results_array_types(
search, param_keys, score_keys, expected_cv_results_kinds
)
assert all(
(
cv_results["param_C"].mask[i]
and cv_results["param_gamma"].mask[i]
and not cv_results["param_degree"].mask[i]
)
for i in range(n_candidates)
if cv_results["param_kernel"][i] == "poly"
)
assert all(
(
not cv_results["param_C"].mask[i]
and not cv_results["param_gamma"].mask[i]
and cv_results["param_degree"].mask[i]
)
for i in range(n_candidates)
if cv_results["param_kernel"][i] == "rbf"
)