add read me
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
Common utilities for testing model selection.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
|
||||
class OneTimeSplitter:
|
||||
"""A wrapper to make KFold single entry cv iterator"""
|
||||
|
||||
def __init__(self, n_splits=4, n_samples=99):
|
||||
self.n_splits = n_splits
|
||||
self.n_samples = n_samples
|
||||
self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
|
||||
|
||||
def split(self, X=None, y=None, groups=None):
|
||||
"""Split can be called only once"""
|
||||
for index in self.indices:
|
||||
yield index
|
||||
|
||||
def get_n_splits(self, X=None, y=None, groups=None):
|
||||
return self.n_splits
|
||||
@@ -0,0 +1,618 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn import config_context
|
||||
from sklearn.base import clone
|
||||
from sklearn.datasets import (
|
||||
load_breast_cancer,
|
||||
load_iris,
|
||||
make_classification,
|
||||
make_multilabel_classification,
|
||||
)
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.metrics import (
|
||||
balanced_accuracy_score,
|
||||
f1_score,
|
||||
fbeta_score,
|
||||
make_scorer,
|
||||
)
|
||||
from sklearn.metrics._scorer import _CurveScorer
|
||||
from sklearn.model_selection import (
|
||||
FixedThresholdClassifier,
|
||||
StratifiedShuffleSplit,
|
||||
TunedThresholdClassifierCV,
|
||||
)
|
||||
from sklearn.model_selection._classification_threshold import (
|
||||
_fit_and_score_over_thresholds,
|
||||
)
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.utils._mocking import CheckingClassifier
|
||||
from sklearn.utils._testing import (
|
||||
_convert_container,
|
||||
assert_allclose,
|
||||
assert_array_equal,
|
||||
)
|
||||
|
||||
|
||||
def test_fit_and_score_over_thresholds_curve_scorers():
|
||||
"""Check that `_fit_and_score_over_thresholds` returns thresholds in ascending order
|
||||
for the different accepted curve scorers."""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
train_idx, val_idx = np.arange(50), np.arange(50, 100)
|
||||
classifier = LogisticRegression()
|
||||
|
||||
curve_scorer = _CurveScorer(
|
||||
score_func=balanced_accuracy_score,
|
||||
sign=1,
|
||||
response_method="predict_proba",
|
||||
thresholds=10,
|
||||
kwargs={},
|
||||
)
|
||||
scores, thresholds = _fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X,
|
||||
y,
|
||||
fit_params={},
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=curve_scorer,
|
||||
score_params={},
|
||||
)
|
||||
|
||||
assert np.all(thresholds[:-1] <= thresholds[1:])
|
||||
assert isinstance(scores, np.ndarray)
|
||||
assert np.logical_and(scores >= 0, scores <= 1).all()
|
||||
|
||||
|
||||
def test_fit_and_score_over_thresholds_prefit():
|
||||
"""Check the behaviour with a prefit classifier."""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
|
||||
# `train_idx is None` to indicate that the classifier is prefit
|
||||
train_idx, val_idx = None, np.arange(50, 100)
|
||||
classifier = DecisionTreeClassifier(random_state=0).fit(X, y)
|
||||
# make sure that the classifier memorized the full dataset such that
|
||||
# we get perfect predictions and thus match the expected score
|
||||
assert classifier.score(X[val_idx], y[val_idx]) == pytest.approx(1.0)
|
||||
|
||||
curve_scorer = _CurveScorer(
|
||||
score_func=balanced_accuracy_score,
|
||||
sign=1,
|
||||
response_method="predict_proba",
|
||||
thresholds=2,
|
||||
kwargs={},
|
||||
)
|
||||
scores, thresholds = _fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X,
|
||||
y,
|
||||
fit_params={},
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=curve_scorer,
|
||||
score_params={},
|
||||
)
|
||||
assert np.all(thresholds[:-1] <= thresholds[1:])
|
||||
assert_allclose(scores, [0.5, 1.0])
|
||||
|
||||
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_fit_and_score_over_thresholds_sample_weight():
|
||||
"""Check that we dispatch the sample-weight to fit and score the classifier."""
|
||||
X, y = load_iris(return_X_y=True)
|
||||
X, y = X[:100], y[:100] # only 2 classes
|
||||
|
||||
# create a dataset and repeat twice the sample of class #0
|
||||
X_repeated, y_repeated = np.vstack([X, X[y == 0]]), np.hstack([y, y[y == 0]])
|
||||
# create a sample weight vector that is equivalent to the repeated dataset
|
||||
sample_weight = np.ones_like(y)
|
||||
sample_weight[:50] *= 2
|
||||
|
||||
classifier = LogisticRegression()
|
||||
train_repeated_idx = np.arange(X_repeated.shape[0])
|
||||
val_repeated_idx = np.arange(X_repeated.shape[0])
|
||||
curve_scorer = _CurveScorer(
|
||||
score_func=balanced_accuracy_score,
|
||||
sign=1,
|
||||
response_method="predict_proba",
|
||||
thresholds=10,
|
||||
kwargs={},
|
||||
)
|
||||
scores_repeated, thresholds_repeated = _fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X_repeated,
|
||||
y_repeated,
|
||||
fit_params={},
|
||||
train_idx=train_repeated_idx,
|
||||
val_idx=val_repeated_idx,
|
||||
curve_scorer=curve_scorer,
|
||||
score_params={},
|
||||
)
|
||||
|
||||
train_idx, val_idx = np.arange(X.shape[0]), np.arange(X.shape[0])
|
||||
scores, thresholds = _fit_and_score_over_thresholds(
|
||||
classifier.set_fit_request(sample_weight=True),
|
||||
X,
|
||||
y,
|
||||
fit_params={"sample_weight": sample_weight},
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=curve_scorer.set_score_request(sample_weight=True),
|
||||
score_params={"sample_weight": sample_weight},
|
||||
)
|
||||
|
||||
assert_allclose(thresholds_repeated, thresholds)
|
||||
assert_allclose(scores_repeated, scores)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_fit_and_score_over_thresholds_fit_params(fit_params_type):
|
||||
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
fit_params = {
|
||||
"a": _convert_container(y, fit_params_type),
|
||||
"b": _convert_container(y, fit_params_type),
|
||||
}
|
||||
|
||||
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
|
||||
classifier.set_fit_request(a=True, b=True)
|
||||
train_idx, val_idx = np.arange(50), np.arange(50, 100)
|
||||
|
||||
curve_scorer = _CurveScorer(
|
||||
score_func=balanced_accuracy_score,
|
||||
sign=1,
|
||||
response_method="predict_proba",
|
||||
thresholds=10,
|
||||
kwargs={},
|
||||
)
|
||||
_fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X,
|
||||
y,
|
||||
fit_params=fit_params,
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=curve_scorer,
|
||||
score_params={},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
make_classification(n_classes=3, n_clusters_per_class=1, random_state=0),
|
||||
make_multilabel_classification(random_state=0),
|
||||
],
|
||||
)
|
||||
def test_tuned_threshold_classifier_no_binary(data):
|
||||
"""Check that we raise an informative error message for non-binary problem."""
|
||||
err_msg = "Only binary classification is supported."
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
TunedThresholdClassifierCV(LogisticRegression()).fit(*data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"params, err_type, err_msg",
|
||||
[
|
||||
(
|
||||
{"cv": "prefit", "refit": True},
|
||||
ValueError,
|
||||
"When cv='prefit', refit cannot be True.",
|
||||
),
|
||||
(
|
||||
{"cv": 10, "refit": False},
|
||||
ValueError,
|
||||
"When cv has several folds, refit cannot be False.",
|
||||
),
|
||||
(
|
||||
{"cv": "prefit", "refit": False},
|
||||
NotFittedError,
|
||||
"`estimator` must be fitted.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_tuned_threshold_classifier_conflict_cv_refit(params, err_type, err_msg):
|
||||
"""Check that we raise an informative error message when `cv` and `refit`
|
||||
cannot be used together.
|
||||
"""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
with pytest.raises(err_type, match=err_msg):
|
||||
TunedThresholdClassifierCV(LogisticRegression(), **params).fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator",
|
||||
[LogisticRegression(), SVC(), GradientBoostingClassifier(n_estimators=4)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["predict_proba", "predict_log_proba", "decision_function"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"ThresholdClassifier", [FixedThresholdClassifier, TunedThresholdClassifierCV]
|
||||
)
|
||||
def test_threshold_classifier_estimator_response_methods(
|
||||
ThresholdClassifier, estimator, response_method
|
||||
):
|
||||
"""Check that `TunedThresholdClassifierCV` exposes the same response methods as the
|
||||
underlying estimator.
|
||||
"""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
|
||||
model = ThresholdClassifier(estimator=estimator)
|
||||
assert hasattr(model, response_method) == hasattr(estimator, response_method)
|
||||
|
||||
model.fit(X, y)
|
||||
assert hasattr(model, response_method) == hasattr(estimator, response_method)
|
||||
|
||||
if hasattr(model, response_method):
|
||||
y_pred_cutoff = getattr(model, response_method)(X)
|
||||
y_pred_underlying_estimator = getattr(model.estimator_, response_method)(X)
|
||||
|
||||
assert_allclose(y_pred_cutoff, y_pred_underlying_estimator)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["auto", "decision_function", "predict_proba"]
|
||||
)
|
||||
def test_tuned_threshold_classifier_without_constraint_value(response_method):
|
||||
"""Check that `TunedThresholdClassifierCV` is optimizing a given objective
|
||||
metric."""
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
# remove feature to degrade performances
|
||||
X = X[:, :5]
|
||||
|
||||
# make the problem completely imbalanced such that the balanced accuracy is low
|
||||
indices_pos = np.flatnonzero(y == 1)
|
||||
indices_pos = indices_pos[: indices_pos.size // 50]
|
||||
indices_neg = np.flatnonzero(y == 0)
|
||||
|
||||
X = np.vstack([X[indices_neg], X[indices_pos]])
|
||||
y = np.hstack([y[indices_neg], y[indices_pos]])
|
||||
|
||||
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
|
||||
thresholds = 100
|
||||
model = TunedThresholdClassifierCV(
|
||||
estimator=lr,
|
||||
scoring="balanced_accuracy",
|
||||
response_method=response_method,
|
||||
thresholds=thresholds,
|
||||
store_cv_results=True,
|
||||
)
|
||||
score_optimized = balanced_accuracy_score(y, model.fit(X, y).predict(X))
|
||||
score_baseline = balanced_accuracy_score(y, lr.predict(X))
|
||||
assert score_optimized > score_baseline
|
||||
assert model.cv_results_["thresholds"].shape == (thresholds,)
|
||||
assert model.cv_results_["scores"].shape == (thresholds,)
|
||||
|
||||
|
||||
def test_tuned_threshold_classifier_metric_with_parameter():
|
||||
"""Check that we can pass a metric with a parameter in addition check that
|
||||
`f_beta` with `beta=1` is equivalent to `f1` and different from `f_beta` with
|
||||
`beta=2`.
|
||||
"""
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
|
||||
model_fbeta_1 = TunedThresholdClassifierCV(
|
||||
estimator=lr, scoring=make_scorer(fbeta_score, beta=1)
|
||||
).fit(X, y)
|
||||
model_fbeta_2 = TunedThresholdClassifierCV(
|
||||
estimator=lr, scoring=make_scorer(fbeta_score, beta=2)
|
||||
).fit(X, y)
|
||||
model_f1 = TunedThresholdClassifierCV(
|
||||
estimator=lr, scoring=make_scorer(f1_score)
|
||||
).fit(X, y)
|
||||
|
||||
assert model_fbeta_1.best_threshold_ == pytest.approx(model_f1.best_threshold_)
|
||||
assert model_fbeta_1.best_threshold_ != pytest.approx(model_fbeta_2.best_threshold_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["auto", "decision_function", "predict_proba"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"metric",
|
||||
[
|
||||
make_scorer(balanced_accuracy_score),
|
||||
make_scorer(f1_score, pos_label="cancer"),
|
||||
],
|
||||
)
|
||||
def test_tuned_threshold_classifier_with_string_targets(response_method, metric):
|
||||
"""Check that targets represented by str are properly managed.
|
||||
Also, check with several metrics to be sure that `pos_label` is properly
|
||||
dispatched.
|
||||
"""
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
# Encode numeric targets by meaningful strings. We purposely designed the class
|
||||
# names such that the `pos_label` is the first alphabetically sorted class and thus
|
||||
# encoded as 0.
|
||||
classes = np.array(["cancer", "healthy"], dtype=object)
|
||||
y = classes[y]
|
||||
model = TunedThresholdClassifierCV(
|
||||
estimator=make_pipeline(StandardScaler(), LogisticRegression()),
|
||||
scoring=metric,
|
||||
response_method=response_method,
|
||||
thresholds=100,
|
||||
).fit(X, y)
|
||||
assert_array_equal(model.classes_, np.sort(classes))
|
||||
y_pred = model.predict(X)
|
||||
assert_array_equal(np.unique(y_pred), np.sort(classes))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("with_sample_weight", [True, False])
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_tuned_threshold_classifier_refit(with_sample_weight, global_random_seed):
|
||||
"""Check the behaviour of the `refit` parameter."""
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
if with_sample_weight:
|
||||
sample_weight = rng.randn(X.shape[0])
|
||||
sample_weight = np.abs(sample_weight, out=sample_weight)
|
||||
else:
|
||||
sample_weight = None
|
||||
|
||||
# check that `estimator_` if fitted on the full dataset when `refit=True`
|
||||
estimator = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
model = TunedThresholdClassifierCV(estimator, refit=True).fit(
|
||||
X, y, sample_weight=sample_weight
|
||||
)
|
||||
|
||||
assert model.estimator_ is not estimator
|
||||
estimator.fit(X, y, sample_weight=sample_weight)
|
||||
assert_allclose(model.estimator_.coef_, estimator.coef_)
|
||||
assert_allclose(model.estimator_.intercept_, estimator.intercept_)
|
||||
|
||||
# check that `estimator_` was not altered when `refit=False` and `cv="prefit"`
|
||||
estimator = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
estimator.fit(X, y, sample_weight=sample_weight)
|
||||
coef = estimator.coef_.copy()
|
||||
model = TunedThresholdClassifierCV(estimator, cv="prefit", refit=False).fit(
|
||||
X, y, sample_weight=sample_weight
|
||||
)
|
||||
|
||||
assert model.estimator_ is estimator
|
||||
assert_allclose(model.estimator_.coef_, coef)
|
||||
|
||||
# check that we train `estimator_` on the training split of a given cross-validation
|
||||
estimator = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
cv = [
|
||||
(np.arange(50), np.arange(50, 100)),
|
||||
] # single split
|
||||
model = TunedThresholdClassifierCV(estimator, cv=cv, refit=False).fit(
|
||||
X, y, sample_weight=sample_weight
|
||||
)
|
||||
|
||||
assert model.estimator_ is not estimator
|
||||
if with_sample_weight:
|
||||
sw_train = sample_weight[cv[0][0]]
|
||||
else:
|
||||
sw_train = None
|
||||
estimator.fit(X[cv[0][0]], y[cv[0][0]], sample_weight=sw_train)
|
||||
assert_allclose(model.estimator_.coef_, estimator.coef_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_tuned_threshold_classifier_fit_params(fit_params_type):
|
||||
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
fit_params = {
|
||||
"a": _convert_container(y, fit_params_type),
|
||||
"b": _convert_container(y, fit_params_type),
|
||||
}
|
||||
|
||||
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
|
||||
classifier.set_fit_request(a=True, b=True)
|
||||
model = TunedThresholdClassifierCV(classifier)
|
||||
model.fit(X, y, **fit_params)
|
||||
|
||||
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence():
|
||||
"""Check that passing removing some sample from the dataset `X` is
|
||||
equivalent to passing a `sample_weight` with a factor 0."""
|
||||
X, y = load_iris(return_X_y=True)
|
||||
# Scale the data to avoid any convergence issue
|
||||
X = StandardScaler().fit_transform(X)
|
||||
# Only use 2 classes and select samples such that 2-fold cross-validation
|
||||
# split will lead to an equivalence with a `sample_weight` of 0
|
||||
X = np.vstack((X[:40], X[50:90]))
|
||||
y = np.hstack((y[:40], y[50:90]))
|
||||
sample_weight = np.zeros_like(y)
|
||||
sample_weight[::2] = 1
|
||||
|
||||
estimator = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
model_without_weights = TunedThresholdClassifierCV(estimator, cv=2)
|
||||
model_with_weights = clone(model_without_weights)
|
||||
|
||||
model_with_weights.fit(X, y, sample_weight=sample_weight)
|
||||
model_without_weights.fit(X[::2], y[::2])
|
||||
|
||||
assert_allclose(
|
||||
model_with_weights.estimator_.coef_, model_without_weights.estimator_.coef_
|
||||
)
|
||||
|
||||
y_pred_with_weights = model_with_weights.predict_proba(X)
|
||||
y_pred_without_weights = model_without_weights.predict_proba(X)
|
||||
assert_allclose(y_pred_with_weights, y_pred_without_weights)
|
||||
|
||||
|
||||
def test_tuned_threshold_classifier_thresholds_array():
|
||||
"""Check that we can pass an array to `thresholds` and it is used as candidate
|
||||
threshold internally."""
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator = LogisticRegression()
|
||||
thresholds = np.linspace(0, 1, 11)
|
||||
tuned_model = TunedThresholdClassifierCV(
|
||||
estimator,
|
||||
thresholds=thresholds,
|
||||
response_method="predict_proba",
|
||||
store_cv_results=True,
|
||||
).fit(X, y)
|
||||
assert_allclose(tuned_model.cv_results_["thresholds"], thresholds)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("store_cv_results", [True, False])
|
||||
def test_tuned_threshold_classifier_store_cv_results(store_cv_results):
|
||||
"""Check that if `cv_results_` exists depending on `store_cv_results`."""
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator = LogisticRegression()
|
||||
tuned_model = TunedThresholdClassifierCV(
|
||||
estimator, store_cv_results=store_cv_results
|
||||
).fit(X, y)
|
||||
if store_cv_results:
|
||||
assert hasattr(tuned_model, "cv_results_")
|
||||
else:
|
||||
assert not hasattr(tuned_model, "cv_results_")
|
||||
|
||||
|
||||
def test_tuned_threshold_classifier_cv_float():
|
||||
"""Check the behaviour when `cv` is set to a float."""
|
||||
X, y = make_classification(random_state=0)
|
||||
|
||||
# case where `refit=False` and cv is a float: the underlying estimator will be fit
|
||||
# on the training set given by a ShuffleSplit. We check that we get the same model
|
||||
# coefficients.
|
||||
test_size = 0.3
|
||||
estimator = LogisticRegression()
|
||||
tuned_model = TunedThresholdClassifierCV(
|
||||
estimator, cv=test_size, refit=False, random_state=0
|
||||
).fit(X, y)
|
||||
tuned_model.fit(X, y)
|
||||
|
||||
cv = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=0)
|
||||
train_idx, val_idx = next(cv.split(X, y))
|
||||
cloned_estimator = clone(estimator).fit(X[train_idx], y[train_idx])
|
||||
|
||||
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
|
||||
|
||||
# case where `refit=True`, then the underlying estimator is fitted on the full
|
||||
# dataset.
|
||||
tuned_model.set_params(refit=True).fit(X, y)
|
||||
cloned_estimator = clone(estimator).fit(X, y)
|
||||
|
||||
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
|
||||
|
||||
|
||||
def test_tuned_threshold_classifier_error_constant_predictor():
|
||||
"""Check that we raise a ValueError if the underlying classifier returns constant
|
||||
probabilities such that we cannot find any threshold.
|
||||
"""
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator = DummyClassifier(strategy="constant", constant=1)
|
||||
tuned_model = TunedThresholdClassifierCV(estimator, response_method="predict_proba")
|
||||
err_msg = "The provided estimator makes constant predictions"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
tuned_model.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["auto", "predict_proba", "decision_function"]
|
||||
)
|
||||
def test_fixed_threshold_classifier_equivalence_default(response_method):
|
||||
"""Check that `FixedThresholdClassifier` has the same behaviour as the vanilla
|
||||
classifier.
|
||||
"""
|
||||
X, y = make_classification(random_state=0)
|
||||
classifier = LogisticRegression().fit(X, y)
|
||||
classifier_default_threshold = FixedThresholdClassifier(
|
||||
estimator=clone(classifier), response_method=response_method
|
||||
)
|
||||
classifier_default_threshold.fit(X, y)
|
||||
|
||||
# emulate the response method that should take into account the `pos_label`
|
||||
if response_method in ("auto", "predict_proba"):
|
||||
y_score = classifier_default_threshold.predict_proba(X)[:, 1]
|
||||
threshold = 0.5
|
||||
else: # response_method == "decision_function"
|
||||
y_score = classifier_default_threshold.decision_function(X)
|
||||
threshold = 0.0
|
||||
|
||||
y_pred_lr = (y_score >= threshold).astype(int)
|
||||
assert_allclose(classifier_default_threshold.predict(X), y_pred_lr)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method, threshold", [("predict_proba", 0.7), ("decision_function", 2.0)]
|
||||
)
|
||||
@pytest.mark.parametrize("pos_label", [0, 1])
|
||||
def test_fixed_threshold_classifier(response_method, threshold, pos_label):
|
||||
"""Check that applying `predict` lead to the same prediction as applying the
|
||||
threshold to the output of the response method.
|
||||
"""
|
||||
X, y = make_classification(n_samples=50, random_state=0)
|
||||
logistic_regression = LogisticRegression().fit(X, y)
|
||||
model = FixedThresholdClassifier(
|
||||
estimator=clone(logistic_regression),
|
||||
threshold=threshold,
|
||||
response_method=response_method,
|
||||
pos_label=pos_label,
|
||||
).fit(X, y)
|
||||
|
||||
# check that the underlying estimator is the same
|
||||
assert_allclose(model.estimator_.coef_, logistic_regression.coef_)
|
||||
|
||||
# emulate the response method that should take into account the `pos_label`
|
||||
if response_method == "predict_proba":
|
||||
y_score = model.predict_proba(X)[:, pos_label]
|
||||
else: # response_method == "decision_function"
|
||||
y_score = model.decision_function(X)
|
||||
y_score = y_score if pos_label == 1 else -y_score
|
||||
|
||||
# create a mapping from boolean values to class labels
|
||||
map_to_label = np.array([0, 1]) if pos_label == 1 else np.array([1, 0])
|
||||
y_pred_lr = map_to_label[(y_score >= threshold).astype(int)]
|
||||
assert_allclose(model.predict(X), y_pred_lr)
|
||||
|
||||
for method in ("predict_proba", "predict_log_proba", "decision_function"):
|
||||
assert_allclose(
|
||||
getattr(model, method)(X), getattr(logistic_regression, method)(X)
|
||||
)
|
||||
assert_allclose(
|
||||
getattr(model.estimator_, method)(X),
|
||||
getattr(logistic_regression, method)(X),
|
||||
)
|
||||
|
||||
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_fixed_threshold_classifier_metadata_routing():
|
||||
"""Check that everything works with metadata routing."""
|
||||
X, y = make_classification(random_state=0)
|
||||
sample_weight = np.ones_like(y)
|
||||
sample_weight[::2] = 2
|
||||
classifier = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
classifier.fit(X, y, sample_weight=sample_weight)
|
||||
classifier_default_threshold = FixedThresholdClassifier(estimator=clone(classifier))
|
||||
classifier_default_threshold.fit(X, y, sample_weight=sample_weight)
|
||||
assert_allclose(classifier_default_threshold.estimator_.coef_, classifier.coef_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["predict_proba", "decision_function", "predict", "predict_log_proba"]
|
||||
)
|
||||
def test_fixed_threshold_classifier_fitted_estimator(method):
|
||||
"""Check that if the underlying estimator is already fitted, no fit is required."""
|
||||
X, y = make_classification(random_state=0)
|
||||
classifier = LogisticRegression().fit(X, y)
|
||||
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
|
||||
# This should not raise an error
|
||||
getattr(fixed_threshold_classifier, method)(X)
|
||||
|
||||
|
||||
def test_fixed_threshold_classifier_classes_():
|
||||
"""Check that the classes_ attribute is properly set."""
|
||||
X, y = make_classification(random_state=0)
|
||||
with pytest.raises(
|
||||
AttributeError, match="The underlying estimator is not fitted yet."
|
||||
):
|
||||
FixedThresholdClassifier(estimator=LogisticRegression()).classes_
|
||||
|
||||
classifier = LogisticRegression().fit(X, y)
|
||||
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
|
||||
assert_array_equal(fixed_threshold_classifier.classes_, classifier.classes_)
|
||||
@@ -0,0 +1,572 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.model_selection import (
|
||||
LearningCurveDisplay,
|
||||
ValidationCurveDisplay,
|
||||
learning_curve,
|
||||
validation_curve,
|
||||
)
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.utils import shuffle
|
||||
from sklearn.utils._testing import assert_allclose, assert_array_equal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return shuffle(*load_iris(return_X_y=True), random_state=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"params, err_type, err_msg",
|
||||
[
|
||||
({"std_display_style": "invalid"}, ValueError, "Unknown std_display_style:"),
|
||||
({"score_type": "invalid"}, ValueError, "Unknown score_type:"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_parameters_validation(
|
||||
pyplot, data, params, err_type, err_msg, CurveDisplay, specific_params
|
||||
):
|
||||
"""Check that we raise a proper error when passing invalid parameters."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
with pytest.raises(err_type, match=err_msg):
|
||||
CurveDisplay.from_estimator(estimator, X, y, **specific_params, **params)
|
||||
|
||||
|
||||
def test_learning_curve_display_default_usage(pyplot, data):
|
||||
"""Check the default usage of the LearningCurveDisplay class."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
train_sizes = [0.3, 0.6, 0.9]
|
||||
display = LearningCurveDisplay.from_estimator(
|
||||
estimator, X, y, train_sizes=train_sizes
|
||||
)
|
||||
|
||||
import matplotlib as mpl
|
||||
|
||||
assert display.errorbar_ is None
|
||||
|
||||
assert isinstance(display.lines_, list)
|
||||
for line in display.lines_:
|
||||
assert isinstance(line, mpl.lines.Line2D)
|
||||
|
||||
assert isinstance(display.fill_between_, list)
|
||||
for fill in display.fill_between_:
|
||||
assert isinstance(fill, mpl.collections.PolyCollection)
|
||||
assert fill.get_alpha() == 0.5
|
||||
|
||||
assert display.score_name == "Score"
|
||||
assert display.ax_.get_xlabel() == "Number of samples in the training set"
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
|
||||
_, legend_labels = display.ax_.get_legend_handles_labels()
|
||||
assert legend_labels == ["Train", "Test"]
|
||||
|
||||
train_sizes_abs, train_scores, test_scores = learning_curve(
|
||||
estimator, X, y, train_sizes=train_sizes
|
||||
)
|
||||
|
||||
assert_array_equal(display.train_sizes, train_sizes_abs)
|
||||
assert_allclose(display.train_scores, train_scores)
|
||||
assert_allclose(display.test_scores, test_scores)
|
||||
|
||||
|
||||
def test_validation_curve_display_default_usage(pyplot, data):
|
||||
"""Check the default usage of the ValidationCurveDisplay class."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
param_name, param_range = "max_depth", [1, 3, 5]
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator, X, y, param_name=param_name, param_range=param_range
|
||||
)
|
||||
|
||||
import matplotlib as mpl
|
||||
|
||||
assert display.errorbar_ is None
|
||||
|
||||
assert isinstance(display.lines_, list)
|
||||
for line in display.lines_:
|
||||
assert isinstance(line, mpl.lines.Line2D)
|
||||
|
||||
assert isinstance(display.fill_between_, list)
|
||||
for fill in display.fill_between_:
|
||||
assert isinstance(fill, mpl.collections.PolyCollection)
|
||||
assert fill.get_alpha() == 0.5
|
||||
|
||||
assert display.score_name == "Score"
|
||||
assert display.ax_.get_xlabel() == f"{param_name}"
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
|
||||
_, legend_labels = display.ax_.get_legend_handles_labels()
|
||||
assert legend_labels == ["Train", "Test"]
|
||||
|
||||
train_scores, test_scores = validation_curve(
|
||||
estimator, X, y, param_name=param_name, param_range=param_range
|
||||
)
|
||||
|
||||
assert_array_equal(display.param_range, param_range)
|
||||
assert_allclose(display.train_scores, train_scores)
|
||||
assert_allclose(display.test_scores, test_scores)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_negate_score(pyplot, data, CurveDisplay, specific_params):
|
||||
"""Check the behaviour of the `negate_score` parameter calling `from_estimator` and
|
||||
`plot`.
|
||||
"""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
|
||||
|
||||
negate_score = False
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, negate_score=negate_score
|
||||
)
|
||||
|
||||
positive_scores = display.lines_[0].get_data()[1]
|
||||
assert (positive_scores >= 0).all()
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
|
||||
negate_score = True
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, negate_score=negate_score
|
||||
)
|
||||
|
||||
negative_scores = display.lines_[0].get_data()[1]
|
||||
assert (negative_scores <= 0).all()
|
||||
assert_allclose(negative_scores, -positive_scores)
|
||||
assert display.ax_.get_ylabel() == "Negative score"
|
||||
|
||||
negate_score = False
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, negate_score=negate_score
|
||||
)
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
display.plot(negate_score=not negate_score)
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
assert (display.lines_[0].get_data()[1] < 0).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"score_name, ylabel", [(None, "Score"), ("Accuracy", "Accuracy")]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_score_name(
|
||||
pyplot, data, score_name, ylabel, CurveDisplay, specific_params
|
||||
):
|
||||
"""Check that we can overwrite the default score name shown on the y-axis."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, score_name=score_name
|
||||
)
|
||||
|
||||
assert display.ax_.get_ylabel() == ylabel
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
|
||||
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, score_name=score_name
|
||||
)
|
||||
|
||||
assert display.score_name == ylabel
|
||||
|
||||
|
||||
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
|
||||
def test_learning_curve_display_score_type(pyplot, data, std_display_style):
|
||||
"""Check the behaviour of setting the `score_type` parameter."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
train_sizes = [0.3, 0.6, 0.9]
|
||||
train_sizes_abs, train_scores, test_scores = learning_curve(
|
||||
estimator, X, y, train_sizes=train_sizes
|
||||
)
|
||||
|
||||
score_type = "train"
|
||||
display = LearningCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
train_sizes=train_sizes,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Train"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 1
|
||||
assert display.errorbar_ is None
|
||||
x_data, y_data = display.lines_[0].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 1
|
||||
x_data, y_data = display.errorbar_[0].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data, train_sizes_abs)
|
||||
assert_allclose(y_data, train_scores.mean(axis=1))
|
||||
|
||||
score_type = "test"
|
||||
display = LearningCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
train_sizes=train_sizes,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Test"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 1
|
||||
assert display.errorbar_ is None
|
||||
x_data, y_data = display.lines_[0].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 1
|
||||
x_data, y_data = display.errorbar_[0].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data, train_sizes_abs)
|
||||
assert_allclose(y_data, test_scores.mean(axis=1))
|
||||
|
||||
score_type = "both"
|
||||
display = LearningCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
train_sizes=train_sizes,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Train", "Test"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 2
|
||||
assert display.errorbar_ is None
|
||||
x_data_train, y_data_train = display.lines_[0].get_data()
|
||||
x_data_test, y_data_test = display.lines_[1].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 2
|
||||
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
|
||||
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data_train, train_sizes_abs)
|
||||
assert_allclose(y_data_train, train_scores.mean(axis=1))
|
||||
assert_array_equal(x_data_test, train_sizes_abs)
|
||||
assert_allclose(y_data_test, test_scores.mean(axis=1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
|
||||
def test_validation_curve_display_score_type(pyplot, data, std_display_style):
|
||||
"""Check the behaviour of setting the `score_type` parameter."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
param_name, param_range = "max_depth", [1, 3, 5]
|
||||
train_scores, test_scores = validation_curve(
|
||||
estimator, X, y, param_name=param_name, param_range=param_range
|
||||
)
|
||||
|
||||
score_type = "train"
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Train"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 1
|
||||
assert display.errorbar_ is None
|
||||
x_data, y_data = display.lines_[0].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 1
|
||||
x_data, y_data = display.errorbar_[0].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data, param_range)
|
||||
assert_allclose(y_data, train_scores.mean(axis=1))
|
||||
|
||||
score_type = "test"
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Test"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 1
|
||||
assert display.errorbar_ is None
|
||||
x_data, y_data = display.lines_[0].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 1
|
||||
x_data, y_data = display.errorbar_[0].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data, param_range)
|
||||
assert_allclose(y_data, test_scores.mean(axis=1))
|
||||
|
||||
score_type = "both"
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Train", "Test"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 2
|
||||
assert display.errorbar_ is None
|
||||
x_data_train, y_data_train = display.lines_[0].get_data()
|
||||
x_data_test, y_data_test = display.lines_[1].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 2
|
||||
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
|
||||
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data_train, param_range)
|
||||
assert_allclose(y_data_train, train_scores.mean(axis=1))
|
||||
assert_array_equal(x_data_test, param_range)
|
||||
assert_allclose(y_data_test, test_scores.mean(axis=1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params, expected_xscale",
|
||||
[
|
||||
(
|
||||
ValidationCurveDisplay,
|
||||
{"param_name": "max_depth", "param_range": np.arange(1, 5)},
|
||||
"linear",
|
||||
),
|
||||
(LearningCurveDisplay, {"train_sizes": np.linspace(0.1, 0.9, num=5)}, "linear"),
|
||||
(
|
||||
ValidationCurveDisplay,
|
||||
{
|
||||
"param_name": "max_depth",
|
||||
"param_range": np.round(np.logspace(0, 2, num=5)).astype(np.int64),
|
||||
},
|
||||
"log",
|
||||
),
|
||||
(LearningCurveDisplay, {"train_sizes": np.logspace(-1, 0, num=5)}, "log"),
|
||||
],
|
||||
)
|
||||
def test_curve_display_xscale_auto(
|
||||
pyplot, data, CurveDisplay, specific_params, expected_xscale
|
||||
):
|
||||
"""Check the behaviour of the x-axis scaling depending on the data provided."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
display = CurveDisplay.from_estimator(estimator, X, y, **specific_params)
|
||||
assert display.ax_.get_xscale() == expected_xscale
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_std_display_style(pyplot, data, CurveDisplay, specific_params):
|
||||
"""Check the behaviour of the parameter `std_display_style`."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
import matplotlib as mpl
|
||||
|
||||
std_display_style = None
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
assert len(display.lines_) == 2
|
||||
for line in display.lines_:
|
||||
assert isinstance(line, mpl.lines.Line2D)
|
||||
assert display.errorbar_ is None
|
||||
assert display.fill_between_ is None
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert len(legend_label) == 2
|
||||
|
||||
std_display_style = "fill_between"
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
assert len(display.lines_) == 2
|
||||
for line in display.lines_:
|
||||
assert isinstance(line, mpl.lines.Line2D)
|
||||
assert display.errorbar_ is None
|
||||
assert len(display.fill_between_) == 2
|
||||
for fill_between in display.fill_between_:
|
||||
assert isinstance(fill_between, mpl.collections.PolyCollection)
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert len(legend_label) == 2
|
||||
|
||||
std_display_style = "errorbar"
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 2
|
||||
for errorbar in display.errorbar_:
|
||||
assert isinstance(errorbar, mpl.container.ErrorbarContainer)
|
||||
assert display.fill_between_ is None
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert len(legend_label) == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_plot_kwargs(pyplot, data, CurveDisplay, specific_params):
|
||||
"""Check the behaviour of the different plotting keyword arguments: `line_kw`,
|
||||
`fill_between_kw`, and `errorbar_kw`."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
std_display_style = "fill_between"
|
||||
line_kw = {"color": "red"}
|
||||
fill_between_kw = {"color": "red", "alpha": 1.0}
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
line_kw=line_kw,
|
||||
fill_between_kw=fill_between_kw,
|
||||
)
|
||||
|
||||
assert display.lines_[0].get_color() == "red"
|
||||
assert_allclose(
|
||||
display.fill_between_[0].get_facecolor(),
|
||||
[[1.0, 0.0, 0.0, 1.0]], # trust me, it's red
|
||||
)
|
||||
|
||||
std_display_style = "errorbar"
|
||||
errorbar_kw = {"color": "red"}
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
errorbar_kw=errorbar_kw,
|
||||
)
|
||||
|
||||
assert display.errorbar_[0].lines[0].get_color() == "red"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"param_range, xscale",
|
||||
[([5, 10, 15], "linear"), ([-50, 5, 50, 500], "symlog"), ([5, 50, 500], "log")],
|
||||
)
|
||||
def test_validation_curve_xscale_from_param_range_provided_as_a_list(
|
||||
pyplot, data, param_range, xscale
|
||||
):
|
||||
"""Check the induced xscale from the provided param_range values."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
param_name = "max_depth"
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
)
|
||||
|
||||
assert display.ax_.get_xscale() == xscale
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"Display, params",
|
||||
[
|
||||
(LearningCurveDisplay, {}),
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
],
|
||||
)
|
||||
def test_subclassing_displays(pyplot, data, Display, params):
|
||||
"""Check that named constructors return the correct type when subclassed.
|
||||
|
||||
Non-regression test for:
|
||||
https://github.com/scikit-learn/scikit-learn/pull/27675
|
||||
"""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
class SubclassOfDisplay(Display):
|
||||
pass
|
||||
|
||||
display = SubclassOfDisplay.from_estimator(estimator, X, y, **params)
|
||||
assert isinstance(display, SubclassOfDisplay)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,853 @@
|
||||
from math import ceil
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy.stats import expon, norm, randint
|
||||
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.experimental import enable_halving_search_cv # noqa: F401
|
||||
from sklearn.model_selection import (
|
||||
GroupKFold,
|
||||
GroupShuffleSplit,
|
||||
HalvingGridSearchCV,
|
||||
HalvingRandomSearchCV,
|
||||
KFold,
|
||||
LeaveOneGroupOut,
|
||||
LeavePGroupsOut,
|
||||
ShuffleSplit,
|
||||
StratifiedKFold,
|
||||
StratifiedShuffleSplit,
|
||||
)
|
||||
from sklearn.model_selection._search_successive_halving import (
|
||||
_SubsampleMetaSplitter,
|
||||
_top_k,
|
||||
)
|
||||
from sklearn.model_selection.tests.test_search import (
|
||||
check_cv_results_array_types,
|
||||
check_cv_results_keys,
|
||||
)
|
||||
from sklearn.svm import SVC, LinearSVC
|
||||
|
||||
|
||||
class FastClassifier(DummyClassifier):
|
||||
"""Dummy classifier that accepts parameters a, b, ... z.
|
||||
|
||||
These parameter don't affect the predictions and are useful for fast
|
||||
grid searching."""
|
||||
|
||||
# update the constraints such that we accept all parameters from a to z
|
||||
_parameter_constraints: dict = {
|
||||
**DummyClassifier._parameter_constraints,
|
||||
**{chr(key): "no_validation" for key in range(ord("a"), ord("z") + 1)},
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self, strategy="stratified", random_state=None, constant=None, **kwargs
|
||||
):
|
||||
super().__init__(
|
||||
strategy=strategy, random_state=random_state, constant=constant
|
||||
)
|
||||
|
||||
def get_params(self, deep=False):
|
||||
params = super().get_params(deep=deep)
|
||||
for char in range(ord("a"), ord("z") + 1):
|
||||
params[chr(char)] = "whatever"
|
||||
return params
|
||||
|
||||
|
||||
class SometimesFailClassifier(DummyClassifier):
|
||||
def __init__(
|
||||
self,
|
||||
strategy="stratified",
|
||||
random_state=None,
|
||||
constant=None,
|
||||
n_estimators=10,
|
||||
fail_fit=False,
|
||||
fail_predict=False,
|
||||
a=0,
|
||||
):
|
||||
self.fail_fit = fail_fit
|
||||
self.fail_predict = fail_predict
|
||||
self.n_estimators = n_estimators
|
||||
self.a = a
|
||||
|
||||
super().__init__(
|
||||
strategy=strategy, random_state=random_state, constant=constant
|
||||
)
|
||||
|
||||
def fit(self, X, y):
|
||||
if self.fail_fit:
|
||||
raise Exception("fitting failed")
|
||||
return super().fit(X, y)
|
||||
|
||||
def predict(self, X):
|
||||
if self.fail_predict:
|
||||
raise Exception("predict failed")
|
||||
return super().predict(X)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.FitFailedWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Scoring failed:UserWarning")
|
||||
@pytest.mark.filterwarnings("ignore:One or more of the:UserWarning")
|
||||
@pytest.mark.parametrize("HalvingSearch", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
@pytest.mark.parametrize("fail_at", ("fit", "predict"))
|
||||
def test_nan_handling(HalvingSearch, fail_at):
|
||||
"""Check the selection of the best scores in presence of failure represented by
|
||||
NaN values."""
|
||||
n_samples = 1_000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
|
||||
search = HalvingSearch(
|
||||
SometimesFailClassifier(),
|
||||
{f"fail_{fail_at}": [False, True], "a": range(3)},
|
||||
resource="n_estimators",
|
||||
max_resources=6,
|
||||
min_resources=1,
|
||||
factor=2,
|
||||
)
|
||||
|
||||
search.fit(X, y)
|
||||
|
||||
# estimators that failed during fit/predict should always rank lower
|
||||
# than ones where the fit/predict succeeded
|
||||
assert not search.best_params_[f"fail_{fail_at}"]
|
||||
scores = search.cv_results_["mean_test_score"]
|
||||
ranks = search.cv_results_["rank_test_score"]
|
||||
|
||||
# some scores should be NaN
|
||||
assert np.isnan(scores).any()
|
||||
|
||||
unique_nan_ranks = np.unique(ranks[np.isnan(scores)])
|
||||
# all NaN scores should have the same rank
|
||||
assert unique_nan_ranks.shape[0] == 1
|
||||
# NaNs should have the lowest rank
|
||||
assert (unique_nan_ranks[0] >= ranks).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
@pytest.mark.parametrize(
|
||||
(
|
||||
"aggressive_elimination,"
|
||||
"max_resources,"
|
||||
"expected_n_iterations,"
|
||||
"expected_n_required_iterations,"
|
||||
"expected_n_possible_iterations,"
|
||||
"expected_n_remaining_candidates,"
|
||||
"expected_n_candidates,"
|
||||
"expected_n_resources,"
|
||||
),
|
||||
[
|
||||
# notice how it loops at the beginning
|
||||
# also, the number of candidates evaluated at the last iteration is
|
||||
# <= factor
|
||||
(True, "limited", 4, 4, 3, 1, [60, 20, 7, 3], [20, 20, 60, 180]),
|
||||
# no aggressive elimination: we end up with less iterations, and
|
||||
# the number of candidates at the last iter is > factor, which isn't
|
||||
# ideal
|
||||
(False, "limited", 3, 4, 3, 3, [60, 20, 7], [20, 60, 180]),
|
||||
# # When the amount of resource isn't limited, aggressive_elimination
|
||||
# # has no effect. Here the default min_resources='exhaust' will take
|
||||
# # over.
|
||||
(True, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
|
||||
(False, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
|
||||
],
|
||||
)
|
||||
def test_aggressive_elimination(
|
||||
Est,
|
||||
aggressive_elimination,
|
||||
max_resources,
|
||||
expected_n_iterations,
|
||||
expected_n_required_iterations,
|
||||
expected_n_possible_iterations,
|
||||
expected_n_remaining_candidates,
|
||||
expected_n_candidates,
|
||||
expected_n_resources,
|
||||
):
|
||||
# Test the aggressive_elimination parameter.
|
||||
|
||||
n_samples = 1000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
|
||||
base_estimator = FastClassifier()
|
||||
|
||||
if max_resources == "limited":
|
||||
max_resources = 180
|
||||
else:
|
||||
max_resources = n_samples
|
||||
|
||||
sh = Est(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
aggressive_elimination=aggressive_elimination,
|
||||
max_resources=max_resources,
|
||||
factor=3,
|
||||
)
|
||||
sh.set_params(verbose=True) # just for test coverage
|
||||
|
||||
if Est is HalvingRandomSearchCV:
|
||||
# same number of candidates as with the grid
|
||||
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
|
||||
|
||||
sh.fit(X, y)
|
||||
|
||||
assert sh.n_iterations_ == expected_n_iterations
|
||||
assert sh.n_required_iterations_ == expected_n_required_iterations
|
||||
assert sh.n_possible_iterations_ == expected_n_possible_iterations
|
||||
assert sh.n_resources_ == expected_n_resources
|
||||
assert sh.n_candidates_ == expected_n_candidates
|
||||
assert sh.n_remaining_candidates_ == expected_n_remaining_candidates
|
||||
assert ceil(sh.n_candidates_[-1] / sh.factor) == sh.n_remaining_candidates_
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
@pytest.mark.parametrize(
|
||||
(
|
||||
"min_resources,"
|
||||
"max_resources,"
|
||||
"expected_n_iterations,"
|
||||
"expected_n_possible_iterations,"
|
||||
"expected_n_resources,"
|
||||
),
|
||||
[
|
||||
# with enough resources
|
||||
("smallest", "auto", 2, 4, [20, 60]),
|
||||
# with enough resources but min_resources set manually
|
||||
(50, "auto", 2, 3, [50, 150]),
|
||||
# without enough resources, only one iteration can be done
|
||||
("smallest", 30, 1, 1, [20]),
|
||||
# with exhaust: use as much resources as possible at the last iter
|
||||
("exhaust", "auto", 2, 2, [333, 999]),
|
||||
("exhaust", 1000, 2, 2, [333, 999]),
|
||||
("exhaust", 999, 2, 2, [333, 999]),
|
||||
("exhaust", 600, 2, 2, [200, 600]),
|
||||
("exhaust", 599, 2, 2, [199, 597]),
|
||||
("exhaust", 300, 2, 2, [100, 300]),
|
||||
("exhaust", 60, 2, 2, [20, 60]),
|
||||
("exhaust", 50, 1, 1, [20]),
|
||||
("exhaust", 20, 1, 1, [20]),
|
||||
],
|
||||
)
|
||||
def test_min_max_resources(
|
||||
Est,
|
||||
min_resources,
|
||||
max_resources,
|
||||
expected_n_iterations,
|
||||
expected_n_possible_iterations,
|
||||
expected_n_resources,
|
||||
):
|
||||
# Test the min_resources and max_resources parameters, and how they affect
|
||||
# the number of resources used at each iteration
|
||||
n_samples = 1000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": [1, 2], "b": [1, 2, 3]}
|
||||
base_estimator = FastClassifier()
|
||||
|
||||
sh = Est(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
factor=3,
|
||||
min_resources=min_resources,
|
||||
max_resources=max_resources,
|
||||
)
|
||||
if Est is HalvingRandomSearchCV:
|
||||
sh.set_params(n_candidates=6) # same number as with the grid
|
||||
|
||||
sh.fit(X, y)
|
||||
|
||||
expected_n_required_iterations = 2 # given 6 combinations and factor = 3
|
||||
assert sh.n_iterations_ == expected_n_iterations
|
||||
assert sh.n_required_iterations_ == expected_n_required_iterations
|
||||
assert sh.n_possible_iterations_ == expected_n_possible_iterations
|
||||
assert sh.n_resources_ == expected_n_resources
|
||||
if min_resources == "exhaust":
|
||||
assert sh.n_possible_iterations_ == sh.n_iterations_ == len(sh.n_resources_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
|
||||
@pytest.mark.parametrize(
|
||||
"max_resources, n_iterations, n_possible_iterations",
|
||||
[
|
||||
("auto", 5, 9), # all resources are used
|
||||
(1024, 5, 9),
|
||||
(700, 5, 8),
|
||||
(512, 5, 8),
|
||||
(511, 5, 7),
|
||||
(32, 4, 4),
|
||||
(31, 3, 3),
|
||||
(16, 3, 3),
|
||||
(4, 1, 1), # max_resources == min_resources, only one iteration is
|
||||
# possible
|
||||
],
|
||||
)
|
||||
def test_n_iterations(Est, max_resources, n_iterations, n_possible_iterations):
|
||||
# test the number of actual iterations that were run depending on
|
||||
# max_resources
|
||||
|
||||
n_samples = 1024
|
||||
X, y = make_classification(n_samples=n_samples, random_state=1)
|
||||
param_grid = {"a": [1, 2], "b": list(range(10))}
|
||||
base_estimator = FastClassifier()
|
||||
factor = 2
|
||||
|
||||
sh = Est(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
cv=2,
|
||||
factor=factor,
|
||||
max_resources=max_resources,
|
||||
min_resources=4,
|
||||
)
|
||||
if Est is HalvingRandomSearchCV:
|
||||
sh.set_params(n_candidates=20) # same as for HalvingGridSearchCV
|
||||
sh.fit(X, y)
|
||||
assert sh.n_required_iterations_ == 5
|
||||
assert sh.n_iterations_ == n_iterations
|
||||
assert sh.n_possible_iterations_ == n_possible_iterations
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
|
||||
def test_resource_parameter(Est):
|
||||
# Test the resource parameter
|
||||
|
||||
n_samples = 1000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": [1, 2], "b": list(range(10))}
|
||||
base_estimator = FastClassifier()
|
||||
sh = Est(base_estimator, param_grid, cv=2, resource="c", max_resources=10, factor=3)
|
||||
sh.fit(X, y)
|
||||
assert set(sh.n_resources_) == set([1, 3, 9])
|
||||
for r_i, params, param_c in zip(
|
||||
sh.cv_results_["n_resources"],
|
||||
sh.cv_results_["params"],
|
||||
sh.cv_results_["param_c"],
|
||||
):
|
||||
assert r_i == params["c"] == param_c
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="Cannot use resource=1234 which is not supported "
|
||||
):
|
||||
sh = HalvingGridSearchCV(
|
||||
base_estimator, param_grid, cv=2, resource="1234", max_resources=10
|
||||
)
|
||||
sh.fit(X, y)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=(
|
||||
"Cannot use parameter c as the resource since it is part "
|
||||
"of the searched parameters."
|
||||
),
|
||||
):
|
||||
param_grid = {"a": [1, 2], "b": [1, 2], "c": [1, 3]}
|
||||
sh = HalvingGridSearchCV(
|
||||
base_estimator, param_grid, cv=2, resource="c", max_resources=10
|
||||
)
|
||||
sh.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"max_resources, n_candidates, expected_n_candidates",
|
||||
[
|
||||
(512, "exhaust", 128), # generate exactly as much as needed
|
||||
(32, "exhaust", 8),
|
||||
(32, 8, 8),
|
||||
(32, 7, 7), # ask for less than what we could
|
||||
(32, 9, 9), # ask for more than 'reasonable'
|
||||
],
|
||||
)
|
||||
def test_random_search(max_resources, n_candidates, expected_n_candidates):
|
||||
# Test random search and make sure the number of generated candidates is
|
||||
# as expected
|
||||
|
||||
n_samples = 1024
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": norm, "b": norm}
|
||||
base_estimator = FastClassifier()
|
||||
sh = HalvingRandomSearchCV(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
n_candidates=n_candidates,
|
||||
cv=2,
|
||||
max_resources=max_resources,
|
||||
factor=2,
|
||||
min_resources=4,
|
||||
)
|
||||
sh.fit(X, y)
|
||||
assert sh.n_candidates_[0] == expected_n_candidates
|
||||
if n_candidates == "exhaust":
|
||||
# Make sure 'exhaust' makes the last iteration use as much resources as
|
||||
# we can
|
||||
assert sh.n_resources_[-1] == max_resources
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"param_distributions, expected_n_candidates",
|
||||
[
|
||||
({"a": [1, 2]}, 2), # all lists, sample less than n_candidates
|
||||
({"a": randint(1, 3)}, 10), # not all list, respect n_candidates
|
||||
],
|
||||
)
|
||||
def test_random_search_discrete_distributions(
|
||||
param_distributions, expected_n_candidates
|
||||
):
|
||||
# Make sure random search samples the appropriate number of candidates when
|
||||
# we ask for more than what's possible. How many parameters are sampled
|
||||
# depends whether the distributions are 'all lists' or not (see
|
||||
# ParameterSampler for details). This is somewhat redundant with the checks
|
||||
# in ParameterSampler but interaction bugs were discovered during
|
||||
# development of SH
|
||||
|
||||
n_samples = 1024
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
base_estimator = FastClassifier()
|
||||
sh = HalvingRandomSearchCV(base_estimator, param_distributions, n_candidates=10)
|
||||
sh.fit(X, y)
|
||||
assert sh.n_candidates_[0] == expected_n_candidates
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_error_message",
|
||||
[
|
||||
(
|
||||
{"resource": "not_a_parameter"},
|
||||
"Cannot use resource=not_a_parameter which is not supported",
|
||||
),
|
||||
(
|
||||
{"resource": "a", "max_resources": 100},
|
||||
"Cannot use parameter a as the resource since it is part of",
|
||||
),
|
||||
(
|
||||
{"max_resources": "auto", "resource": "b"},
|
||||
"resource can only be 'n_samples' when max_resources='auto'",
|
||||
),
|
||||
(
|
||||
{"min_resources": 15, "max_resources": 14},
|
||||
"min_resources_=15 is greater than max_resources_=14",
|
||||
),
|
||||
({"cv": KFold(shuffle=True)}, "must yield consistent folds"),
|
||||
({"cv": ShuffleSplit()}, "must yield consistent folds"),
|
||||
],
|
||||
)
|
||||
def test_input_errors(Est, params, expected_error_message):
|
||||
base_estimator = FastClassifier()
|
||||
param_grid = {"a": [1]}
|
||||
X, y = make_classification(100)
|
||||
|
||||
sh = Est(base_estimator, param_grid, **params)
|
||||
|
||||
with pytest.raises(ValueError, match=expected_error_message):
|
||||
sh.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_error_message",
|
||||
[
|
||||
(
|
||||
{"n_candidates": "exhaust", "min_resources": "exhaust"},
|
||||
"cannot be both set to 'exhaust'",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_input_errors_randomized(params, expected_error_message):
|
||||
# tests specific to HalvingRandomSearchCV
|
||||
|
||||
base_estimator = FastClassifier()
|
||||
param_grid = {"a": [1]}
|
||||
X, y = make_classification(100)
|
||||
|
||||
sh = HalvingRandomSearchCV(base_estimator, param_grid, **params)
|
||||
|
||||
with pytest.raises(ValueError, match=expected_error_message):
|
||||
sh.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fraction, subsample_test, expected_train_size, expected_test_size",
|
||||
[
|
||||
(0.5, True, 40, 10),
|
||||
(0.5, False, 40, 20),
|
||||
(0.2, True, 16, 4),
|
||||
(0.2, False, 16, 20),
|
||||
],
|
||||
)
|
||||
def test_subsample_splitter_shapes(
|
||||
fraction, subsample_test, expected_train_size, expected_test_size
|
||||
):
|
||||
# Make sure splits returned by SubsampleMetaSplitter are of appropriate
|
||||
# size
|
||||
|
||||
n_samples = 100
|
||||
X, y = make_classification(n_samples)
|
||||
cv = _SubsampleMetaSplitter(
|
||||
base_cv=KFold(5),
|
||||
fraction=fraction,
|
||||
subsample_test=subsample_test,
|
||||
random_state=None,
|
||||
)
|
||||
|
||||
for train, test in cv.split(X, y):
|
||||
assert train.shape[0] == expected_train_size
|
||||
assert test.shape[0] == expected_test_size
|
||||
if subsample_test:
|
||||
assert train.shape[0] + test.shape[0] == int(n_samples * fraction)
|
||||
else:
|
||||
assert test.shape[0] == n_samples // cv.base_cv.get_n_splits()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("subsample_test", (True, False))
|
||||
def test_subsample_splitter_determinism(subsample_test):
|
||||
# Make sure _SubsampleMetaSplitter is consistent across calls to split():
|
||||
# - we're OK having training sets differ (they're always sampled with a
|
||||
# different fraction anyway)
|
||||
# - when we don't subsample the test set, we want it to be always the same.
|
||||
# This check is the most important. This is ensured by the determinism
|
||||
# of the base_cv.
|
||||
|
||||
# Note: we could force both train and test splits to be always the same if
|
||||
# we drew an int seed in _SubsampleMetaSplitter.__init__
|
||||
|
||||
n_samples = 100
|
||||
X, y = make_classification(n_samples)
|
||||
cv = _SubsampleMetaSplitter(
|
||||
base_cv=KFold(5), fraction=0.5, subsample_test=subsample_test, random_state=None
|
||||
)
|
||||
|
||||
folds_a = list(cv.split(X, y, groups=None))
|
||||
folds_b = list(cv.split(X, y, groups=None))
|
||||
|
||||
for (train_a, test_a), (train_b, test_b) in zip(folds_a, folds_b):
|
||||
assert not np.all(train_a == train_b)
|
||||
|
||||
if subsample_test:
|
||||
assert not np.all(test_a == test_b)
|
||||
else:
|
||||
assert np.all(test_a == test_b)
|
||||
assert np.all(X[test_a] == X[test_b])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"k, itr, expected",
|
||||
[
|
||||
(1, 0, ["c"]),
|
||||
(2, 0, ["a", "c"]),
|
||||
(4, 0, ["d", "b", "a", "c"]),
|
||||
(10, 0, ["d", "b", "a", "c"]),
|
||||
(1, 1, ["e"]),
|
||||
(2, 1, ["f", "e"]),
|
||||
(10, 1, ["f", "e"]),
|
||||
(1, 2, ["i"]),
|
||||
(10, 2, ["g", "h", "i"]),
|
||||
],
|
||||
)
|
||||
def test_top_k(k, itr, expected):
|
||||
results = { # this isn't a 'real world' result dict
|
||||
"iter": [0, 0, 0, 0, 1, 1, 2, 2, 2],
|
||||
"mean_test_score": [4, 3, 5, 1, 11, 10, 5, 6, 9],
|
||||
"params": ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
|
||||
}
|
||||
got = _top_k(results, k=k, itr=itr)
|
||||
assert np.all(got == expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
|
||||
def test_cv_results(Est):
|
||||
# test that the cv_results_ matches correctly the logic of the
|
||||
# tournament: in particular that the candidates continued in each
|
||||
# successive iteration are those that were best in the previous iteration
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
n_samples = 1000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
|
||||
base_estimator = FastClassifier()
|
||||
|
||||
# generate random scores: we want to avoid ties, which would otherwise
|
||||
# mess with the ordering and make testing harder
|
||||
def scorer(est, X, y):
|
||||
return rng.rand()
|
||||
|
||||
sh = Est(base_estimator, param_grid, factor=2, scoring=scorer)
|
||||
if Est is HalvingRandomSearchCV:
|
||||
# same number of candidates as with the grid
|
||||
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
|
||||
|
||||
sh.fit(X, y)
|
||||
|
||||
# non-regression check for
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/19203
|
||||
assert isinstance(sh.cv_results_["iter"], np.ndarray)
|
||||
assert isinstance(sh.cv_results_["n_resources"], np.ndarray)
|
||||
|
||||
cv_results_df = pd.DataFrame(sh.cv_results_)
|
||||
|
||||
# just make sure we don't have ties
|
||||
assert len(cv_results_df["mean_test_score"].unique()) == len(cv_results_df)
|
||||
|
||||
cv_results_df["params_str"] = cv_results_df["params"].apply(str)
|
||||
table = cv_results_df.pivot(
|
||||
index="params_str", columns="iter", values="mean_test_score"
|
||||
)
|
||||
|
||||
# table looks like something like this:
|
||||
# iter 0 1 2 3 4 5
|
||||
# params_str
|
||||
# {'a': 'l2', 'b': 23} 0.75 NaN NaN NaN NaN NaN
|
||||
# {'a': 'l1', 'b': 30} 0.90 0.875 NaN NaN NaN NaN
|
||||
# {'a': 'l1', 'b': 0} 0.75 NaN NaN NaN NaN NaN
|
||||
# {'a': 'l2', 'b': 3} 0.85 0.925 0.9125 0.90625 NaN NaN
|
||||
# {'a': 'l1', 'b': 5} 0.80 NaN NaN NaN NaN NaN
|
||||
# ...
|
||||
|
||||
# where a NaN indicates that the candidate wasn't evaluated at a given
|
||||
# iteration, because it wasn't part of the top-K at some previous
|
||||
# iteration. We here make sure that candidates that aren't in the top-k at
|
||||
# any given iteration are indeed not evaluated at the subsequent
|
||||
# iterations.
|
||||
nan_mask = pd.isna(table)
|
||||
n_iter = sh.n_iterations_
|
||||
for it in range(n_iter - 1):
|
||||
already_discarded_mask = nan_mask[it]
|
||||
|
||||
# make sure that if a candidate is already discarded, we don't evaluate
|
||||
# it later
|
||||
assert (
|
||||
already_discarded_mask & nan_mask[it + 1] == already_discarded_mask
|
||||
).all()
|
||||
|
||||
# make sure that the number of discarded candidate is correct
|
||||
discarded_now_mask = ~already_discarded_mask & nan_mask[it + 1]
|
||||
kept_mask = ~already_discarded_mask & ~discarded_now_mask
|
||||
assert kept_mask.sum() == sh.n_candidates_[it + 1]
|
||||
|
||||
# make sure that all discarded candidates have a lower score than the
|
||||
# kept candidates
|
||||
discarded_max_score = table[it].where(discarded_now_mask).max()
|
||||
kept_min_score = table[it].where(kept_mask).min()
|
||||
assert discarded_max_score < kept_min_score
|
||||
|
||||
# We now make sure that the best candidate is chosen only from the last
|
||||
# iteration.
|
||||
# We also make sure this is true even if there were higher scores in
|
||||
# earlier rounds (this isn't generally the case, but worth ensuring it's
|
||||
# possible).
|
||||
|
||||
last_iter = cv_results_df["iter"].max()
|
||||
idx_best_last_iter = cv_results_df[cv_results_df["iter"] == last_iter][
|
||||
"mean_test_score"
|
||||
].idxmax()
|
||||
idx_best_all_iters = cv_results_df["mean_test_score"].idxmax()
|
||||
|
||||
assert sh.best_params_ == cv_results_df.iloc[idx_best_last_iter]["params"]
|
||||
assert (
|
||||
cv_results_df.iloc[idx_best_last_iter]["mean_test_score"]
|
||||
< cv_results_df.iloc[idx_best_all_iters]["mean_test_score"]
|
||||
)
|
||||
assert (
|
||||
cv_results_df.iloc[idx_best_last_iter]["params"]
|
||||
!= cv_results_df.iloc[idx_best_all_iters]["params"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
def test_base_estimator_inputs(Est):
|
||||
# make sure that the base estimators are passed the correct parameters and
|
||||
# number of samples at each iteration.
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
passed_n_samples_fit = []
|
||||
passed_n_samples_predict = []
|
||||
passed_params = []
|
||||
|
||||
class FastClassifierBookKeeping(FastClassifier):
|
||||
def fit(self, X, y):
|
||||
passed_n_samples_fit.append(X.shape[0])
|
||||
return super().fit(X, y)
|
||||
|
||||
def predict(self, X):
|
||||
passed_n_samples_predict.append(X.shape[0])
|
||||
return super().predict(X)
|
||||
|
||||
def set_params(self, **params):
|
||||
passed_params.append(params)
|
||||
return super().set_params(**params)
|
||||
|
||||
n_samples = 1024
|
||||
n_splits = 2
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
|
||||
base_estimator = FastClassifierBookKeeping()
|
||||
|
||||
sh = Est(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
factor=2,
|
||||
cv=n_splits,
|
||||
return_train_score=False,
|
||||
refit=False,
|
||||
)
|
||||
if Est is HalvingRandomSearchCV:
|
||||
# same number of candidates as with the grid
|
||||
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
|
||||
|
||||
sh.fit(X, y)
|
||||
|
||||
assert len(passed_n_samples_fit) == len(passed_n_samples_predict)
|
||||
passed_n_samples = [
|
||||
x + y for (x, y) in zip(passed_n_samples_fit, passed_n_samples_predict)
|
||||
]
|
||||
|
||||
# Lists are of length n_splits * n_iter * n_candidates_at_i.
|
||||
# Each chunk of size n_splits corresponds to the n_splits folds for the
|
||||
# same candidate at the same iteration, so they contain equal values. We
|
||||
# subsample such that the lists are of length n_iter * n_candidates_at_it
|
||||
passed_n_samples = passed_n_samples[::n_splits]
|
||||
passed_params = passed_params[::n_splits]
|
||||
|
||||
cv_results_df = pd.DataFrame(sh.cv_results_)
|
||||
|
||||
assert len(passed_params) == len(passed_n_samples) == len(cv_results_df)
|
||||
|
||||
uniques, counts = np.unique(passed_n_samples, return_counts=True)
|
||||
assert (sh.n_resources_ == uniques).all()
|
||||
assert (sh.n_candidates_ == counts).all()
|
||||
|
||||
assert (cv_results_df["params"] == passed_params).all()
|
||||
assert (cv_results_df["n_resources"] == passed_n_samples).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
def test_groups_support(Est):
|
||||
# Check if ValueError (when groups is None) propagates to
|
||||
# HalvingGridSearchCV and HalvingRandomSearchCV
|
||||
# And also check if groups is correctly passed to the cv object
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
X, y = make_classification(n_samples=50, n_classes=2, random_state=0)
|
||||
groups = rng.randint(0, 3, 50)
|
||||
|
||||
clf = LinearSVC(random_state=0)
|
||||
grid = {"C": [1]}
|
||||
|
||||
group_cvs = [
|
||||
LeaveOneGroupOut(),
|
||||
LeavePGroupsOut(2),
|
||||
GroupKFold(n_splits=3),
|
||||
GroupShuffleSplit(random_state=0),
|
||||
]
|
||||
error_msg = "The 'groups' parameter should not be None."
|
||||
for cv in group_cvs:
|
||||
gs = Est(clf, grid, cv=cv, random_state=0)
|
||||
with pytest.raises(ValueError, match=error_msg):
|
||||
gs.fit(X, y)
|
||||
gs.fit(X, y, groups=groups)
|
||||
|
||||
non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit(random_state=0)]
|
||||
for cv in non_group_cvs:
|
||||
gs = Est(clf, grid, cv=cv)
|
||||
# Should not raise an error
|
||||
gs.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("SearchCV", [HalvingRandomSearchCV, HalvingGridSearchCV])
|
||||
def test_min_resources_null(SearchCV):
|
||||
"""Check that we raise an error if the minimum resources is set to 0."""
|
||||
base_estimator = FastClassifier()
|
||||
param_grid = {"a": [1]}
|
||||
X = np.empty(0).reshape(0, 3)
|
||||
|
||||
search = SearchCV(base_estimator, param_grid, min_resources="smallest")
|
||||
|
||||
err_msg = "min_resources_=0: you might have passed an empty dataset X."
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
search.fit(X, [])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("SearchCV", [HalvingGridSearchCV, HalvingRandomSearchCV])
|
||||
def test_select_best_index(SearchCV):
|
||||
"""Check the selection strategy of the halving search."""
|
||||
results = { # this isn't a 'real world' result dict
|
||||
"iter": np.array([0, 0, 0, 0, 1, 1, 2, 2, 2]),
|
||||
"mean_test_score": np.array([4, 3, 5, 1, 11, 10, 5, 6, 9]),
|
||||
"params": np.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
|
||||
}
|
||||
|
||||
# we expect the index of 'i'
|
||||
best_index = SearchCV._select_best_index(None, None, results)
|
||||
assert best_index == 8
|
||||
|
||||
|
||||
def test_halving_random_search_list_of_dicts():
|
||||
"""Check the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
|
||||
being a list of dictionary.
|
||||
"""
|
||||
X, y = make_classification(n_samples=150, n_features=4, random_state=42)
|
||||
|
||||
params = [
|
||||
{"kernel": ["rbf"], "C": expon(scale=10), "gamma": expon(scale=0.1)},
|
||||
{"kernel": ["poly"], "degree": [2, 3]},
|
||||
]
|
||||
param_keys = (
|
||||
"param_C",
|
||||
"param_degree",
|
||||
"param_gamma",
|
||||
"param_kernel",
|
||||
)
|
||||
score_keys = (
|
||||
"mean_test_score",
|
||||
"mean_train_score",
|
||||
"rank_test_score",
|
||||
"split0_test_score",
|
||||
"split1_test_score",
|
||||
"split2_test_score",
|
||||
"split0_train_score",
|
||||
"split1_train_score",
|
||||
"split2_train_score",
|
||||
"std_test_score",
|
||||
"std_train_score",
|
||||
"mean_fit_time",
|
||||
"std_fit_time",
|
||||
"mean_score_time",
|
||||
"std_score_time",
|
||||
)
|
||||
extra_keys = ("n_resources", "iter")
|
||||
|
||||
search = HalvingRandomSearchCV(
|
||||
SVC(), cv=3, param_distributions=params, return_train_score=True, random_state=0
|
||||
)
|
||||
search.fit(X, y)
|
||||
n_candidates = sum(search.n_candidates_)
|
||||
cv_results = search.cv_results_
|
||||
# Check results structure
|
||||
check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates, extra_keys)
|
||||
expected_cv_results_kinds = {
|
||||
"param_C": "f",
|
||||
"param_degree": "i",
|
||||
"param_gamma": "f",
|
||||
"param_kernel": "O",
|
||||
}
|
||||
check_cv_results_array_types(
|
||||
search, param_keys, score_keys, expected_cv_results_kinds
|
||||
)
|
||||
|
||||
assert all(
|
||||
(
|
||||
cv_results["param_C"].mask[i]
|
||||
and cv_results["param_gamma"].mask[i]
|
||||
and not cv_results["param_degree"].mask[i]
|
||||
)
|
||||
for i in range(n_candidates)
|
||||
if cv_results["param_kernel"][i] == "poly"
|
||||
)
|
||||
assert all(
|
||||
(
|
||||
not cv_results["param_C"].mask[i]
|
||||
and not cv_results["param_gamma"].mask[i]
|
||||
and cv_results["param_degree"].mask[i]
|
||||
)
|
||||
for i in range(n_candidates)
|
||||
if cv_results["param_kernel"][i] == "rbf"
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user