add read me

This commit is contained in:
2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
"""
The :mod:`sklearn._loss` module includes loss function classes suitable for
fitting classification and regression tasks.
"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from .loss import (
AbsoluteError,
HalfBinomialLoss,
HalfGammaLoss,
HalfMultinomialLoss,
HalfPoissonLoss,
HalfSquaredError,
HalfTweedieLoss,
HalfTweedieLossIdentity,
HuberLoss,
PinballLoss,
)
__all__ = [
"AbsoluteError",
"HalfBinomialLoss",
"HalfGammaLoss",
"HalfMultinomialLoss",
"HalfPoissonLoss",
"HalfSquaredError",
"HalfTweedieLoss",
"HalfTweedieLossIdentity",
"HuberLoss",
"PinballLoss",
]

View File

@@ -0,0 +1,101 @@
# Fused types for input like y_true, raw_prediction, sample_weights.
ctypedef fused floating_in:
double
float
# Fused types for output like gradient and hessian
# We use a different fused types for input (floating_in) and output (floating_out), such
# that input and output can have different dtypes in the same function call. A single
# fused type can only take on one single value (type) for all arguments in one function
# call.
ctypedef fused floating_out:
double
float
# Struct to return 2 doubles
ctypedef struct double_pair:
double val1
double val2
# C base class for loss functions
cdef class CyLossFunction:
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyHalfSquaredError(CyLossFunction):
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyAbsoluteError(CyLossFunction):
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyPinballLoss(CyLossFunction):
cdef readonly double quantile # readonly makes it accessible from Python
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyHuberLoss(CyLossFunction):
cdef public double delta # public makes it accessible from Python
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyHalfPoissonLoss(CyLossFunction):
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyHalfGammaLoss(CyLossFunction):
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyHalfTweedieLoss(CyLossFunction):
cdef readonly double power # readonly makes it accessible from Python
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyHalfTweedieLossIdentity(CyLossFunction):
cdef readonly double power # readonly makes it accessible from Python
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyHalfBinomialLoss(CyLossFunction):
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyExponentialLoss(CyLossFunction):
cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil
cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil
cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil
cdef class CyHalfMultinomialLoss():
cdef void cy_gradient(
self,
const floating_in y_true,
const floating_in[::1] raw_prediction,
const floating_in sample_weight,
floating_out[::1] gradient_out,
) noexcept nogil

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,282 @@
"""
Module contains classes for invertible (and differentiable) link functions.
"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from abc import ABC, abstractmethod
from dataclasses import dataclass
import numpy as np
from scipy.special import expit, logit
from scipy.stats import gmean
from ..utils.extmath import softmax
@dataclass
class Interval:
low: float
high: float
low_inclusive: bool
high_inclusive: bool
def __post_init__(self):
"""Check that low <= high"""
if self.low > self.high:
raise ValueError(
f"One must have low <= high; got low={self.low}, high={self.high}."
)
def includes(self, x):
"""Test whether all values of x are in interval range.
Parameters
----------
x : ndarray
Array whose elements are tested to be in interval range.
Returns
-------
result : bool
"""
if self.low_inclusive:
low = np.greater_equal(x, self.low)
else:
low = np.greater(x, self.low)
if not np.all(low):
return False
if self.high_inclusive:
high = np.less_equal(x, self.high)
else:
high = np.less(x, self.high)
# Note: np.all returns numpy.bool_
return bool(np.all(high))
def _inclusive_low_high(interval, dtype=np.float64):
"""Generate values low and high to be within the interval range.
This is used in tests only.
Returns
-------
low, high : tuple
The returned values low and high lie within the interval.
"""
eps = 10 * np.finfo(dtype).eps
if interval.low == -np.inf:
low = -1e10
elif interval.low < 0:
low = interval.low * (1 - eps) + eps
else:
low = interval.low * (1 + eps) + eps
if interval.high == np.inf:
high = 1e10
elif interval.high < 0:
high = interval.high * (1 + eps) - eps
else:
high = interval.high * (1 - eps) - eps
return low, high
class BaseLink(ABC):
"""Abstract base class for differentiable, invertible link functions.
Convention:
- link function g: raw_prediction = g(y_pred)
- inverse link h: y_pred = h(raw_prediction)
For (generalized) linear models, `raw_prediction = X @ coef` is the so
called linear predictor, and `y_pred = h(raw_prediction)` is the predicted
conditional (on X) expected value of the target `y_true`.
The methods are not implemented as staticmethods in case a link function needs
parameters.
"""
is_multiclass = False # used for testing only
# Usually, raw_prediction may be any real number and y_pred is an open
# interval.
# interval_raw_prediction = Interval(-np.inf, np.inf, False, False)
interval_y_pred = Interval(-np.inf, np.inf, False, False)
@abstractmethod
def link(self, y_pred, out=None):
"""Compute the link function g(y_pred).
The link function maps (predicted) target values to raw predictions,
i.e. `g(y_pred) = raw_prediction`.
Parameters
----------
y_pred : array
Predicted target values.
out : array
A location into which the result is stored. If provided, it must
have a shape that the inputs broadcast to. If not provided or None,
a freshly-allocated array is returned.
Returns
-------
out : array
Output array, element-wise link function.
"""
@abstractmethod
def inverse(self, raw_prediction, out=None):
"""Compute the inverse link function h(raw_prediction).
The inverse link function maps raw predictions to predicted target
values, i.e. `h(raw_prediction) = y_pred`.
Parameters
----------
raw_prediction : array
Raw prediction values (in link space).
out : array
A location into which the result is stored. If provided, it must
have a shape that the inputs broadcast to. If not provided or None,
a freshly-allocated array is returned.
Returns
-------
out : array
Output array, element-wise inverse link function.
"""
class IdentityLink(BaseLink):
"""The identity link function g(x)=x."""
def link(self, y_pred, out=None):
if out is not None:
np.copyto(out, y_pred)
return out
else:
return y_pred
inverse = link
class LogLink(BaseLink):
"""The log link function g(x)=log(x)."""
interval_y_pred = Interval(0, np.inf, False, False)
def link(self, y_pred, out=None):
return np.log(y_pred, out=out)
def inverse(self, raw_prediction, out=None):
return np.exp(raw_prediction, out=out)
class LogitLink(BaseLink):
"""The logit link function g(x)=logit(x)."""
interval_y_pred = Interval(0, 1, False, False)
def link(self, y_pred, out=None):
return logit(y_pred, out=out)
def inverse(self, raw_prediction, out=None):
return expit(raw_prediction, out=out)
class HalfLogitLink(BaseLink):
"""Half the logit link function g(x)=1/2 * logit(x).
Used for the exponential loss.
"""
interval_y_pred = Interval(0, 1, False, False)
def link(self, y_pred, out=None):
out = logit(y_pred, out=out)
out *= 0.5
return out
def inverse(self, raw_prediction, out=None):
return expit(2 * raw_prediction, out)
class MultinomialLogit(BaseLink):
"""The symmetric multinomial logit function.
Convention:
- y_pred.shape = raw_prediction.shape = (n_samples, n_classes)
Notes:
- The inverse link h is the softmax function.
- The sum is over the second axis, i.e. axis=1 (n_classes).
We have to choose additional constraints in order to make
y_pred[k] = exp(raw_pred[k]) / sum(exp(raw_pred[k]), k=0..n_classes-1)
for n_classes classes identifiable and invertible.
We choose the symmetric side constraint where the geometric mean response
is set as reference category, see [2]:
The symmetric multinomial logit link function for a single data point is
then defined as
raw_prediction[k] = g(y_pred[k]) = log(y_pred[k]/gmean(y_pred))
= log(y_pred[k]) - mean(log(y_pred)).
Note that this is equivalent to the definition in [1] and implies mean
centered raw predictions:
sum(raw_prediction[k], k=0..n_classes-1) = 0.
For linear models with raw_prediction = X @ coef, this corresponds to
sum(coef[k], k=0..n_classes-1) = 0, i.e. the sum over classes for every
feature is zero.
Reference
---------
.. [1] Friedman, Jerome; Hastie, Trevor; Tibshirani, Robert. "Additive
logistic regression: a statistical view of boosting" Ann. Statist.
28 (2000), no. 2, 337--407. doi:10.1214/aos/1016218223.
https://projecteuclid.org/euclid.aos/1016218223
.. [2] Zahid, Faisal Maqbool and Gerhard Tutz. "Ridge estimation for
multinomial logit models with symmetric side constraints."
Computational Statistics 28 (2013): 1017-1034.
http://epub.ub.uni-muenchen.de/11001/1/tr067.pdf
"""
is_multiclass = True
interval_y_pred = Interval(0, 1, False, False)
def symmetrize_raw_prediction(self, raw_prediction):
return raw_prediction - np.mean(raw_prediction, axis=1)[:, np.newaxis]
def link(self, y_pred, out=None):
# geometric mean as reference category
gm = gmean(y_pred, axis=1)
return np.log(y_pred / gm[:, np.newaxis], out=out)
def inverse(self, raw_prediction, out=None):
if out is None:
return softmax(raw_prediction, copy=True)
else:
np.copyto(out, raw_prediction)
softmax(out, copy=False)
return out
_LINKS = {
"identity": IdentityLink,
"log": LogLink,
"logit": LogitLink,
"half_logit": HalfLogitLink,
"multinomial_logit": MultinomialLogit,
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,23 @@
# .pyx is generated, so this is needed to make Cython compilation work
_loss_cython_tree = [
fs.copyfile('_loss.pxd')
]
_loss_pyx = custom_target(
'_loss_pyx',
output: '_loss.pyx',
input: '_loss.pyx.tp',
command: [tempita, '@INPUT@', '-o', '@OUTDIR@'],
# TODO in principle this should go in py.exension_module below. This is
# temporary work-around for dependency issue with .pyx.tp files. For more
# details, see https://github.com/mesonbuild/meson/issues/13212
depends: _loss_cython_tree,
)
py.extension_module(
'_loss',
cython_gen.process(_loss_pyx),
dependencies: [openmp_dep],
install: true,
subdir: 'sklearn/_loss',
)

View File

@@ -0,0 +1,111 @@
import numpy as np
import pytest
from numpy.testing import assert_allclose, assert_array_equal
from sklearn._loss.link import (
_LINKS,
HalfLogitLink,
Interval,
MultinomialLogit,
_inclusive_low_high,
)
LINK_FUNCTIONS = list(_LINKS.values())
def test_interval_raises():
"""Test that interval with low > high raises ValueError."""
with pytest.raises(
ValueError, match="One must have low <= high; got low=1, high=0."
):
Interval(1, 0, False, False)
@pytest.mark.parametrize(
"interval",
[
Interval(0, 1, False, False),
Interval(0, 1, False, True),
Interval(0, 1, True, False),
Interval(0, 1, True, True),
Interval(-np.inf, np.inf, False, False),
Interval(-np.inf, np.inf, False, True),
Interval(-np.inf, np.inf, True, False),
Interval(-np.inf, np.inf, True, True),
Interval(-10, -1, False, False),
Interval(-10, -1, False, True),
Interval(-10, -1, True, False),
Interval(-10, -1, True, True),
],
)
def test_is_in_range(interval):
# make sure low and high are always within the interval, used for linspace
low, high = _inclusive_low_high(interval)
x = np.linspace(low, high, num=10)
assert interval.includes(x)
# x contains lower bound
assert interval.includes(np.r_[x, interval.low]) == interval.low_inclusive
# x contains upper bound
assert interval.includes(np.r_[x, interval.high]) == interval.high_inclusive
# x contains upper and lower bound
assert interval.includes(np.r_[x, interval.low, interval.high]) == (
interval.low_inclusive and interval.high_inclusive
)
@pytest.mark.parametrize("link", LINK_FUNCTIONS)
def test_link_inverse_identity(link, global_random_seed):
# Test that link of inverse gives identity.
rng = np.random.RandomState(global_random_seed)
link = link()
n_samples, n_classes = 100, None
# The values for `raw_prediction` are limited from -20 to 20 because in the
# class `LogitLink` the term `expit(x)` comes very close to 1 for large
# positive x and therefore loses precision.
if link.is_multiclass:
n_classes = 10
raw_prediction = rng.uniform(low=-20, high=20, size=(n_samples, n_classes))
if isinstance(link, MultinomialLogit):
raw_prediction = link.symmetrize_raw_prediction(raw_prediction)
elif isinstance(link, HalfLogitLink):
raw_prediction = rng.uniform(low=-10, high=10, size=(n_samples))
else:
raw_prediction = rng.uniform(low=-20, high=20, size=(n_samples))
assert_allclose(link.link(link.inverse(raw_prediction)), raw_prediction)
y_pred = link.inverse(raw_prediction)
assert_allclose(link.inverse(link.link(y_pred)), y_pred)
@pytest.mark.parametrize("link", LINK_FUNCTIONS)
def test_link_out_argument(link):
# Test that out argument gets assigned the result.
rng = np.random.RandomState(42)
link = link()
n_samples, n_classes = 100, None
if link.is_multiclass:
n_classes = 10
raw_prediction = rng.normal(loc=0, scale=10, size=(n_samples, n_classes))
if isinstance(link, MultinomialLogit):
raw_prediction = link.symmetrize_raw_prediction(raw_prediction)
else:
# So far, the valid interval of raw_prediction is (-inf, inf) and
# we do not need to distinguish.
raw_prediction = rng.uniform(low=-10, high=10, size=(n_samples))
y_pred = link.inverse(raw_prediction, out=None)
out = np.empty_like(raw_prediction)
y_pred_2 = link.inverse(raw_prediction, out=out)
assert_allclose(y_pred, out)
assert_array_equal(out, y_pred_2)
assert np.shares_memory(out, y_pred_2)
out = np.empty_like(y_pred)
raw_prediction_2 = link.link(y_pred, out=out)
assert_allclose(raw_prediction, out)
assert_array_equal(out, raw_prediction_2)
assert np.shares_memory(out, raw_prediction_2)

File diff suppressed because it is too large Load Diff