add read me
This commit is contained in:
@@ -0,0 +1,9 @@
|
||||
"""Mixture modeling algorithms."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from ._bayesian_mixture import BayesianGaussianMixture
|
||||
from ._gaussian_mixture import GaussianMixture
|
||||
|
||||
__all__ = ["BayesianGaussianMixture", "GaussianMixture"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
571
venv/lib/python3.12/site-packages/sklearn/mixture/_base.py
Normal file
571
venv/lib/python3.12/site-packages/sklearn/mixture/_base.py
Normal file
@@ -0,0 +1,571 @@
|
||||
"""Base class for mixture models."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import warnings
|
||||
from abc import ABCMeta, abstractmethod
|
||||
from numbers import Integral, Real
|
||||
from time import time
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import logsumexp
|
||||
|
||||
from .. import cluster
|
||||
from ..base import BaseEstimator, DensityMixin, _fit_context
|
||||
from ..cluster import kmeans_plusplus
|
||||
from ..exceptions import ConvergenceWarning
|
||||
from ..utils import check_random_state
|
||||
from ..utils._param_validation import Interval, StrOptions
|
||||
from ..utils.validation import check_is_fitted, validate_data
|
||||
|
||||
|
||||
def _check_shape(param, param_shape, name):
|
||||
"""Validate the shape of the input parameter 'param'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
param : array
|
||||
|
||||
param_shape : tuple
|
||||
|
||||
name : str
|
||||
"""
|
||||
param = np.array(param)
|
||||
if param.shape != param_shape:
|
||||
raise ValueError(
|
||||
"The parameter '%s' should have the shape of %s, but got %s"
|
||||
% (name, param_shape, param.shape)
|
||||
)
|
||||
|
||||
|
||||
class BaseMixture(DensityMixin, BaseEstimator, metaclass=ABCMeta):
|
||||
"""Base class for mixture models.
|
||||
|
||||
This abstract class specifies an interface for all mixture classes and
|
||||
provides basic common methods for mixture models.
|
||||
"""
|
||||
|
||||
_parameter_constraints: dict = {
|
||||
"n_components": [Interval(Integral, 1, None, closed="left")],
|
||||
"tol": [Interval(Real, 0.0, None, closed="left")],
|
||||
"reg_covar": [Interval(Real, 0.0, None, closed="left")],
|
||||
"max_iter": [Interval(Integral, 0, None, closed="left")],
|
||||
"n_init": [Interval(Integral, 1, None, closed="left")],
|
||||
"init_params": [
|
||||
StrOptions({"kmeans", "random", "random_from_data", "k-means++"})
|
||||
],
|
||||
"random_state": ["random_state"],
|
||||
"warm_start": ["boolean"],
|
||||
"verbose": ["verbose"],
|
||||
"verbose_interval": [Interval(Integral, 1, None, closed="left")],
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
n_components,
|
||||
tol,
|
||||
reg_covar,
|
||||
max_iter,
|
||||
n_init,
|
||||
init_params,
|
||||
random_state,
|
||||
warm_start,
|
||||
verbose,
|
||||
verbose_interval,
|
||||
):
|
||||
self.n_components = n_components
|
||||
self.tol = tol
|
||||
self.reg_covar = reg_covar
|
||||
self.max_iter = max_iter
|
||||
self.n_init = n_init
|
||||
self.init_params = init_params
|
||||
self.random_state = random_state
|
||||
self.warm_start = warm_start
|
||||
self.verbose = verbose
|
||||
self.verbose_interval = verbose_interval
|
||||
|
||||
@abstractmethod
|
||||
def _check_parameters(self, X):
|
||||
"""Check initial parameters of the derived class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
"""
|
||||
pass
|
||||
|
||||
def _initialize_parameters(self, X, random_state):
|
||||
"""Initialize the model parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
random_state : RandomState
|
||||
A random number generator instance that controls the random seed
|
||||
used for the method chosen to initialize the parameters.
|
||||
"""
|
||||
n_samples, _ = X.shape
|
||||
|
||||
if self.init_params == "kmeans":
|
||||
resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
|
||||
label = (
|
||||
cluster.KMeans(
|
||||
n_clusters=self.n_components, n_init=1, random_state=random_state
|
||||
)
|
||||
.fit(X)
|
||||
.labels_
|
||||
)
|
||||
resp[np.arange(n_samples), label] = 1
|
||||
elif self.init_params == "random":
|
||||
resp = np.asarray(
|
||||
random_state.uniform(size=(n_samples, self.n_components)), dtype=X.dtype
|
||||
)
|
||||
resp /= resp.sum(axis=1)[:, np.newaxis]
|
||||
elif self.init_params == "random_from_data":
|
||||
resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
|
||||
indices = random_state.choice(
|
||||
n_samples, size=self.n_components, replace=False
|
||||
)
|
||||
resp[indices, np.arange(self.n_components)] = 1
|
||||
elif self.init_params == "k-means++":
|
||||
resp = np.zeros((n_samples, self.n_components), dtype=X.dtype)
|
||||
_, indices = kmeans_plusplus(
|
||||
X,
|
||||
self.n_components,
|
||||
random_state=random_state,
|
||||
)
|
||||
resp[indices, np.arange(self.n_components)] = 1
|
||||
|
||||
self._initialize(X, resp)
|
||||
|
||||
@abstractmethod
|
||||
def _initialize(self, X, resp):
|
||||
"""Initialize the model parameters of the derived class.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
resp : array-like of shape (n_samples, n_components)
|
||||
"""
|
||||
pass
|
||||
|
||||
def fit(self, X, y=None):
|
||||
"""Estimate model parameters with the EM algorithm.
|
||||
|
||||
The method fits the model ``n_init`` times and sets the parameters with
|
||||
which the model has the largest likelihood or lower bound. Within each
|
||||
trial, the method iterates between E-step and M-step for ``max_iter``
|
||||
times until the change of likelihood or lower bound is less than
|
||||
``tol``, otherwise, a ``ConvergenceWarning`` is raised.
|
||||
If ``warm_start`` is ``True``, then ``n_init`` is ignored and a single
|
||||
initialization is performed upon the first call. Upon consecutive
|
||||
calls, training starts where it left off.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
List of n_features-dimensional data points. Each row
|
||||
corresponds to a single data point.
|
||||
|
||||
y : Ignored
|
||||
Not used, present for API consistency by convention.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
The fitted mixture.
|
||||
"""
|
||||
# parameters are validated in fit_predict
|
||||
self.fit_predict(X, y)
|
||||
return self
|
||||
|
||||
@_fit_context(prefer_skip_nested_validation=True)
|
||||
def fit_predict(self, X, y=None):
|
||||
"""Estimate model parameters using X and predict the labels for X.
|
||||
|
||||
The method fits the model n_init times and sets the parameters with
|
||||
which the model has the largest likelihood or lower bound. Within each
|
||||
trial, the method iterates between E-step and M-step for `max_iter`
|
||||
times until the change of likelihood or lower bound is less than
|
||||
`tol`, otherwise, a :class:`~sklearn.exceptions.ConvergenceWarning` is
|
||||
raised. After fitting, it predicts the most probable label for the
|
||||
input data points.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
List of n_features-dimensional data points. Each row
|
||||
corresponds to a single data point.
|
||||
|
||||
y : Ignored
|
||||
Not used, present for API consistency by convention.
|
||||
|
||||
Returns
|
||||
-------
|
||||
labels : array, shape (n_samples,)
|
||||
Component labels.
|
||||
"""
|
||||
X = validate_data(self, X, dtype=[np.float64, np.float32], ensure_min_samples=2)
|
||||
if X.shape[0] < self.n_components:
|
||||
raise ValueError(
|
||||
"Expected n_samples >= n_components "
|
||||
f"but got n_components = {self.n_components}, "
|
||||
f"n_samples = {X.shape[0]}"
|
||||
)
|
||||
self._check_parameters(X)
|
||||
|
||||
# if we enable warm_start, we will have a unique initialisation
|
||||
do_init = not (self.warm_start and hasattr(self, "converged_"))
|
||||
n_init = self.n_init if do_init else 1
|
||||
|
||||
max_lower_bound = -np.inf
|
||||
best_lower_bounds = []
|
||||
self.converged_ = False
|
||||
|
||||
random_state = check_random_state(self.random_state)
|
||||
|
||||
n_samples, _ = X.shape
|
||||
for init in range(n_init):
|
||||
self._print_verbose_msg_init_beg(init)
|
||||
|
||||
if do_init:
|
||||
self._initialize_parameters(X, random_state)
|
||||
|
||||
lower_bound = -np.inf if do_init else self.lower_bound_
|
||||
current_lower_bounds = []
|
||||
|
||||
if self.max_iter == 0:
|
||||
best_params = self._get_parameters()
|
||||
best_n_iter = 0
|
||||
else:
|
||||
converged = False
|
||||
for n_iter in range(1, self.max_iter + 1):
|
||||
prev_lower_bound = lower_bound
|
||||
|
||||
log_prob_norm, log_resp = self._e_step(X)
|
||||
self._m_step(X, log_resp)
|
||||
lower_bound = self._compute_lower_bound(log_resp, log_prob_norm)
|
||||
current_lower_bounds.append(lower_bound)
|
||||
|
||||
change = lower_bound - prev_lower_bound
|
||||
self._print_verbose_msg_iter_end(n_iter, change)
|
||||
|
||||
if abs(change) < self.tol:
|
||||
converged = True
|
||||
break
|
||||
|
||||
self._print_verbose_msg_init_end(lower_bound, converged)
|
||||
|
||||
if lower_bound > max_lower_bound or max_lower_bound == -np.inf:
|
||||
max_lower_bound = lower_bound
|
||||
best_params = self._get_parameters()
|
||||
best_n_iter = n_iter
|
||||
best_lower_bounds = current_lower_bounds
|
||||
self.converged_ = converged
|
||||
|
||||
# Should only warn about convergence if max_iter > 0, otherwise
|
||||
# the user is assumed to have used 0-iters initialization
|
||||
# to get the initial means.
|
||||
if not self.converged_ and self.max_iter > 0:
|
||||
warnings.warn(
|
||||
(
|
||||
"Best performing initialization did not converge. "
|
||||
"Try different init parameters, or increase max_iter, "
|
||||
"tol, or check for degenerate data."
|
||||
),
|
||||
ConvergenceWarning,
|
||||
)
|
||||
|
||||
self._set_parameters(best_params)
|
||||
self.n_iter_ = best_n_iter
|
||||
self.lower_bound_ = max_lower_bound
|
||||
self.lower_bounds_ = best_lower_bounds
|
||||
|
||||
# Always do a final e-step to guarantee that the labels returned by
|
||||
# fit_predict(X) are always consistent with fit(X).predict(X)
|
||||
# for any value of max_iter and tol (and any random_state).
|
||||
_, log_resp = self._e_step(X)
|
||||
|
||||
return log_resp.argmax(axis=1)
|
||||
|
||||
def _e_step(self, X):
|
||||
"""E step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_prob_norm : float
|
||||
Mean of the logarithms of the probabilities of each sample in X
|
||||
|
||||
log_responsibility : array, shape (n_samples, n_components)
|
||||
Logarithm of the posterior probabilities (or responsibilities) of
|
||||
the point of each sample in X.
|
||||
"""
|
||||
log_prob_norm, log_resp = self._estimate_log_prob_resp(X)
|
||||
return np.mean(log_prob_norm), log_resp
|
||||
|
||||
@abstractmethod
|
||||
def _m_step(self, X, log_resp):
|
||||
"""M step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
log_resp : array-like of shape (n_samples, n_components)
|
||||
Logarithm of the posterior probabilities (or responsibilities) of
|
||||
the point of each sample in X.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _get_parameters(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _set_parameters(self, params):
|
||||
pass
|
||||
|
||||
def score_samples(self, X):
|
||||
"""Compute the log-likelihood of each sample.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
List of n_features-dimensional data points. Each row
|
||||
corresponds to a single data point.
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_prob : array, shape (n_samples,)
|
||||
Log-likelihood of each sample in `X` under the current model.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
X = validate_data(self, X, reset=False)
|
||||
|
||||
return logsumexp(self._estimate_weighted_log_prob(X), axis=1)
|
||||
|
||||
def score(self, X, y=None):
|
||||
"""Compute the per-sample average log-likelihood of the given data X.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_dimensions)
|
||||
List of n_features-dimensional data points. Each row
|
||||
corresponds to a single data point.
|
||||
|
||||
y : Ignored
|
||||
Not used, present for API consistency by convention.
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_likelihood : float
|
||||
Log-likelihood of `X` under the Gaussian mixture model.
|
||||
"""
|
||||
return self.score_samples(X).mean()
|
||||
|
||||
def predict(self, X):
|
||||
"""Predict the labels for the data samples in X using trained model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
List of n_features-dimensional data points. Each row
|
||||
corresponds to a single data point.
|
||||
|
||||
Returns
|
||||
-------
|
||||
labels : array, shape (n_samples,)
|
||||
Component labels.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
X = validate_data(self, X, reset=False)
|
||||
return self._estimate_weighted_log_prob(X).argmax(axis=1)
|
||||
|
||||
def predict_proba(self, X):
|
||||
"""Evaluate the components' density for each sample.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
List of n_features-dimensional data points. Each row
|
||||
corresponds to a single data point.
|
||||
|
||||
Returns
|
||||
-------
|
||||
resp : array, shape (n_samples, n_components)
|
||||
Density of each Gaussian component for each sample in X.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
X = validate_data(self, X, reset=False)
|
||||
_, log_resp = self._estimate_log_prob_resp(X)
|
||||
return np.exp(log_resp)
|
||||
|
||||
def sample(self, n_samples=1):
|
||||
"""Generate random samples from the fitted Gaussian distribution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_samples : int, default=1
|
||||
Number of samples to generate.
|
||||
|
||||
Returns
|
||||
-------
|
||||
X : array, shape (n_samples, n_features)
|
||||
Randomly generated sample.
|
||||
|
||||
y : array, shape (nsamples,)
|
||||
Component labels.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
|
||||
if n_samples < 1:
|
||||
raise ValueError(
|
||||
"Invalid value for 'n_samples': %d . The sampling requires at "
|
||||
"least one sample." % (self.n_components)
|
||||
)
|
||||
|
||||
_, n_features = self.means_.shape
|
||||
rng = check_random_state(self.random_state)
|
||||
n_samples_comp = rng.multinomial(n_samples, self.weights_)
|
||||
|
||||
if self.covariance_type == "full":
|
||||
X = np.vstack(
|
||||
[
|
||||
rng.multivariate_normal(mean, covariance, int(sample))
|
||||
for (mean, covariance, sample) in zip(
|
||||
self.means_, self.covariances_, n_samples_comp
|
||||
)
|
||||
]
|
||||
)
|
||||
elif self.covariance_type == "tied":
|
||||
X = np.vstack(
|
||||
[
|
||||
rng.multivariate_normal(mean, self.covariances_, int(sample))
|
||||
for (mean, sample) in zip(self.means_, n_samples_comp)
|
||||
]
|
||||
)
|
||||
else:
|
||||
X = np.vstack(
|
||||
[
|
||||
mean
|
||||
+ rng.standard_normal(size=(sample, n_features))
|
||||
* np.sqrt(covariance)
|
||||
for (mean, covariance, sample) in zip(
|
||||
self.means_, self.covariances_, n_samples_comp
|
||||
)
|
||||
]
|
||||
)
|
||||
|
||||
y = np.concatenate(
|
||||
[np.full(sample, j, dtype=int) for j, sample in enumerate(n_samples_comp)]
|
||||
)
|
||||
|
||||
return (X, y)
|
||||
|
||||
def _estimate_weighted_log_prob(self, X):
|
||||
"""Estimate the weighted log-probabilities, log P(X | Z) + log weights.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
Returns
|
||||
-------
|
||||
weighted_log_prob : array, shape (n_samples, n_component)
|
||||
"""
|
||||
return self._estimate_log_prob(X) + self._estimate_log_weights()
|
||||
|
||||
@abstractmethod
|
||||
def _estimate_log_weights(self):
|
||||
"""Estimate log-weights in EM algorithm, E[ log pi ] in VB algorithm.
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_weight : array, shape (n_components, )
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _estimate_log_prob(self, X):
|
||||
"""Estimate the log-probabilities log P(X | Z).
|
||||
|
||||
Compute the log-probabilities per each component for each sample.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_prob : array, shape (n_samples, n_component)
|
||||
"""
|
||||
pass
|
||||
|
||||
def _estimate_log_prob_resp(self, X):
|
||||
"""Estimate log probabilities and responsibilities for each sample.
|
||||
|
||||
Compute the log probabilities, weighted log probabilities per
|
||||
component and responsibilities for each sample in X with respect to
|
||||
the current state of the model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_prob_norm : array, shape (n_samples,)
|
||||
log p(X)
|
||||
|
||||
log_responsibilities : array, shape (n_samples, n_components)
|
||||
logarithm of the responsibilities
|
||||
"""
|
||||
weighted_log_prob = self._estimate_weighted_log_prob(X)
|
||||
log_prob_norm = logsumexp(weighted_log_prob, axis=1)
|
||||
with np.errstate(under="ignore"):
|
||||
# ignore underflow
|
||||
log_resp = weighted_log_prob - log_prob_norm[:, np.newaxis]
|
||||
return log_prob_norm, log_resp
|
||||
|
||||
def _print_verbose_msg_init_beg(self, n_init):
|
||||
"""Print verbose message on initialization."""
|
||||
if self.verbose == 1:
|
||||
print("Initialization %d" % n_init)
|
||||
elif self.verbose >= 2:
|
||||
print("Initialization %d" % n_init)
|
||||
self._init_prev_time = time()
|
||||
self._iter_prev_time = self._init_prev_time
|
||||
|
||||
def _print_verbose_msg_iter_end(self, n_iter, diff_ll):
|
||||
"""Print verbose message on initialization."""
|
||||
if n_iter % self.verbose_interval == 0:
|
||||
if self.verbose == 1:
|
||||
print(" Iteration %d" % n_iter)
|
||||
elif self.verbose >= 2:
|
||||
cur_time = time()
|
||||
print(
|
||||
" Iteration %d\t time lapse %.5fs\t ll change %.5f"
|
||||
% (n_iter, cur_time - self._iter_prev_time, diff_ll)
|
||||
)
|
||||
self._iter_prev_time = cur_time
|
||||
|
||||
def _print_verbose_msg_init_end(self, lb, init_has_converged):
|
||||
"""Print verbose message on the end of iteration."""
|
||||
converged_msg = "converged" if init_has_converged else "did not converge"
|
||||
if self.verbose == 1:
|
||||
print(f"Initialization {converged_msg}.")
|
||||
elif self.verbose >= 2:
|
||||
t = time() - self._init_prev_time
|
||||
print(
|
||||
f"Initialization {converged_msg}. time lapse {t:.5f}s\t lower bound"
|
||||
f" {lb:.5f}."
|
||||
)
|
||||
@@ -0,0 +1,891 @@
|
||||
"""Bayesian Gaussian Mixture Model."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import math
|
||||
from numbers import Real
|
||||
|
||||
import numpy as np
|
||||
from scipy.special import betaln, digamma, gammaln
|
||||
|
||||
from ..utils import check_array
|
||||
from ..utils._param_validation import Interval, StrOptions
|
||||
from ._base import BaseMixture, _check_shape
|
||||
from ._gaussian_mixture import (
|
||||
_check_precision_matrix,
|
||||
_check_precision_positivity,
|
||||
_compute_log_det_cholesky,
|
||||
_compute_precision_cholesky,
|
||||
_estimate_gaussian_parameters,
|
||||
_estimate_log_gaussian_prob,
|
||||
)
|
||||
|
||||
|
||||
def _log_dirichlet_norm(dirichlet_concentration):
|
||||
"""Compute the log of the Dirichlet distribution normalization term.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
dirichlet_concentration : array-like of shape (n_samples,)
|
||||
The parameters values of the Dirichlet distribution.
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_dirichlet_norm : float
|
||||
The log normalization of the Dirichlet distribution.
|
||||
"""
|
||||
return gammaln(np.sum(dirichlet_concentration)) - np.sum(
|
||||
gammaln(dirichlet_concentration)
|
||||
)
|
||||
|
||||
|
||||
def _log_wishart_norm(degrees_of_freedom, log_det_precisions_chol, n_features):
|
||||
"""Compute the log of the Wishart distribution normalization term.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
degrees_of_freedom : array-like of shape (n_components,)
|
||||
The number of degrees of freedom on the covariance Wishart
|
||||
distributions.
|
||||
|
||||
log_det_precision_chol : array-like of shape (n_components,)
|
||||
The determinant of the precision matrix for each component.
|
||||
|
||||
n_features : int
|
||||
The number of features.
|
||||
|
||||
Return
|
||||
------
|
||||
log_wishart_norm : array-like of shape (n_components,)
|
||||
The log normalization of the Wishart distribution.
|
||||
"""
|
||||
# To simplify the computation we have removed the np.log(np.pi) term
|
||||
return -(
|
||||
degrees_of_freedom * log_det_precisions_chol
|
||||
+ degrees_of_freedom * n_features * 0.5 * math.log(2.0)
|
||||
+ np.sum(
|
||||
gammaln(0.5 * (degrees_of_freedom - np.arange(n_features)[:, np.newaxis])),
|
||||
0,
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class BayesianGaussianMixture(BaseMixture):
|
||||
"""Variational Bayesian estimation of a Gaussian mixture.
|
||||
|
||||
This class allows to infer an approximate posterior distribution over the
|
||||
parameters of a Gaussian mixture distribution. The effective number of
|
||||
components can be inferred from the data.
|
||||
|
||||
This class implements two types of prior for the weights distribution: a
|
||||
finite mixture model with Dirichlet distribution and an infinite mixture
|
||||
model with the Dirichlet Process. In practice Dirichlet Process inference
|
||||
algorithm is approximated and uses a truncated distribution with a fixed
|
||||
maximum number of components (called the Stick-breaking representation).
|
||||
The number of components actually used almost always depends on the data.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
Read more in the :ref:`User Guide <bgmm>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_components : int, default=1
|
||||
The number of mixture components. Depending on the data and the value
|
||||
of the `weight_concentration_prior` the model can decide to not use
|
||||
all the components by setting some component `weights_` to values very
|
||||
close to zero. The number of effective components is therefore smaller
|
||||
than n_components.
|
||||
|
||||
covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'
|
||||
String describing the type of covariance parameters to use.
|
||||
Must be one of:
|
||||
|
||||
- 'full' (each component has its own general covariance matrix),
|
||||
- 'tied' (all components share the same general covariance matrix),
|
||||
- 'diag' (each component has its own diagonal covariance matrix),
|
||||
- 'spherical' (each component has its own single variance).
|
||||
|
||||
tol : float, default=1e-3
|
||||
The convergence threshold. EM iterations will stop when the
|
||||
lower bound average gain on the likelihood (of the training data with
|
||||
respect to the model) is below this threshold.
|
||||
|
||||
reg_covar : float, default=1e-6
|
||||
Non-negative regularization added to the diagonal of covariance.
|
||||
Allows to assure that the covariance matrices are all positive.
|
||||
|
||||
max_iter : int, default=100
|
||||
The number of EM iterations to perform.
|
||||
|
||||
n_init : int, default=1
|
||||
The number of initializations to perform. The result with the highest
|
||||
lower bound value on the likelihood is kept.
|
||||
|
||||
init_params : {'kmeans', 'k-means++', 'random', 'random_from_data'}, \
|
||||
default='kmeans'
|
||||
The method used to initialize the weights, the means and the
|
||||
covariances. String must be one of:
|
||||
|
||||
- 'kmeans': responsibilities are initialized using kmeans.
|
||||
- 'k-means++': use the k-means++ method to initialize.
|
||||
- 'random': responsibilities are initialized randomly.
|
||||
- 'random_from_data': initial means are randomly selected data points.
|
||||
|
||||
.. versionchanged:: v1.1
|
||||
`init_params` now accepts 'random_from_data' and 'k-means++' as
|
||||
initialization methods.
|
||||
|
||||
weight_concentration_prior_type : {'dirichlet_process', 'dirichlet_distribution'}, \
|
||||
default='dirichlet_process'
|
||||
String describing the type of the weight concentration prior.
|
||||
|
||||
weight_concentration_prior : float or None, default=None
|
||||
The dirichlet concentration of each component on the weight
|
||||
distribution (Dirichlet). This is commonly called gamma in the
|
||||
literature. The higher concentration puts more mass in
|
||||
the center and will lead to more components being active, while a lower
|
||||
concentration parameter will lead to more mass at the edge of the
|
||||
mixture weights simplex. The value of the parameter must be greater
|
||||
than 0. If it is None, it's set to ``1. / n_components``.
|
||||
|
||||
mean_precision_prior : float or None, default=None
|
||||
The precision prior on the mean distribution (Gaussian).
|
||||
Controls the extent of where means can be placed. Larger
|
||||
values concentrate the cluster means around `mean_prior`.
|
||||
The value of the parameter must be greater than 0.
|
||||
If it is None, it is set to 1.
|
||||
|
||||
mean_prior : array-like, shape (n_features,), default=None
|
||||
The prior on the mean distribution (Gaussian).
|
||||
If it is None, it is set to the mean of X.
|
||||
|
||||
degrees_of_freedom_prior : float or None, default=None
|
||||
The prior of the number of degrees of freedom on the covariance
|
||||
distributions (Wishart). If it is None, it's set to `n_features`.
|
||||
|
||||
covariance_prior : float or array-like, default=None
|
||||
The prior on the covariance distribution (Wishart).
|
||||
If it is None, the emiprical covariance prior is initialized using the
|
||||
covariance of X. The shape depends on `covariance_type`::
|
||||
|
||||
(n_features, n_features) if 'full',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_features) if 'diag',
|
||||
float if 'spherical'
|
||||
|
||||
random_state : int, RandomState instance or None, default=None
|
||||
Controls the random seed given to the method chosen to initialize the
|
||||
parameters (see `init_params`).
|
||||
In addition, it controls the generation of random samples from the
|
||||
fitted distribution (see the method `sample`).
|
||||
Pass an int for reproducible output across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
warm_start : bool, default=False
|
||||
If 'warm_start' is True, the solution of the last fitting is used as
|
||||
initialization for the next call of fit(). This can speed up
|
||||
convergence when fit is called several times on similar problems.
|
||||
See :term:`the Glossary <warm_start>`.
|
||||
|
||||
verbose : int, default=0
|
||||
Enable verbose output. If 1 then it prints the current
|
||||
initialization and each iteration step. If greater than 1 then
|
||||
it prints also the log probability and the time needed
|
||||
for each step.
|
||||
|
||||
verbose_interval : int, default=10
|
||||
Number of iteration done before the next print.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
weights_ : array-like of shape (n_components,)
|
||||
The weights of each mixture components.
|
||||
|
||||
means_ : array-like of shape (n_components, n_features)
|
||||
The mean of each mixture component.
|
||||
|
||||
covariances_ : array-like
|
||||
The covariance of each mixture component.
|
||||
The shape depends on `covariance_type`::
|
||||
|
||||
(n_components,) if 'spherical',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_components, n_features) if 'diag',
|
||||
(n_components, n_features, n_features) if 'full'
|
||||
|
||||
precisions_ : array-like
|
||||
The precision matrices for each component in the mixture. A precision
|
||||
matrix is the inverse of a covariance matrix. A covariance matrix is
|
||||
symmetric positive definite so the mixture of Gaussian can be
|
||||
equivalently parameterized by the precision matrices. Storing the
|
||||
precision matrices instead of the covariance matrices makes it more
|
||||
efficient to compute the log-likelihood of new samples at test time.
|
||||
The shape depends on ``covariance_type``::
|
||||
|
||||
(n_components,) if 'spherical',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_components, n_features) if 'diag',
|
||||
(n_components, n_features, n_features) if 'full'
|
||||
|
||||
precisions_cholesky_ : array-like
|
||||
The cholesky decomposition of the precision matrices of each mixture
|
||||
component. A precision matrix is the inverse of a covariance matrix.
|
||||
A covariance matrix is symmetric positive definite so the mixture of
|
||||
Gaussian can be equivalently parameterized by the precision matrices.
|
||||
Storing the precision matrices instead of the covariance matrices makes
|
||||
it more efficient to compute the log-likelihood of new samples at test
|
||||
time. The shape depends on ``covariance_type``::
|
||||
|
||||
(n_components,) if 'spherical',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_components, n_features) if 'diag',
|
||||
(n_components, n_features, n_features) if 'full'
|
||||
|
||||
converged_ : bool
|
||||
True when convergence of the best fit of inference was reached, False otherwise.
|
||||
|
||||
n_iter_ : int
|
||||
Number of step used by the best fit of inference to reach the
|
||||
convergence.
|
||||
|
||||
lower_bound_ : float
|
||||
Lower bound value on the model evidence (of the training data) of the
|
||||
best fit of inference.
|
||||
|
||||
lower_bounds_ : array-like of shape (`n_iter_`,)
|
||||
The list of lower bound values on the model evidence from each iteration
|
||||
of the best fit of inference.
|
||||
|
||||
weight_concentration_prior_ : tuple or float
|
||||
The dirichlet concentration of each component on the weight
|
||||
distribution (Dirichlet). The type depends on
|
||||
``weight_concentration_prior_type``::
|
||||
|
||||
(float, float) if 'dirichlet_process' (Beta parameters),
|
||||
float if 'dirichlet_distribution' (Dirichlet parameters).
|
||||
|
||||
The higher concentration puts more mass in
|
||||
the center and will lead to more components being active, while a lower
|
||||
concentration parameter will lead to more mass at the edge of the
|
||||
simplex.
|
||||
|
||||
weight_concentration_ : array-like of shape (n_components,)
|
||||
The dirichlet concentration of each component on the weight
|
||||
distribution (Dirichlet).
|
||||
|
||||
mean_precision_prior_ : float
|
||||
The precision prior on the mean distribution (Gaussian).
|
||||
Controls the extent of where means can be placed.
|
||||
Larger values concentrate the cluster means around `mean_prior`.
|
||||
If mean_precision_prior is set to None, `mean_precision_prior_` is set
|
||||
to 1.
|
||||
|
||||
mean_precision_ : array-like of shape (n_components,)
|
||||
The precision of each components on the mean distribution (Gaussian).
|
||||
|
||||
mean_prior_ : array-like of shape (n_features,)
|
||||
The prior on the mean distribution (Gaussian).
|
||||
|
||||
degrees_of_freedom_prior_ : float
|
||||
The prior of the number of degrees of freedom on the covariance
|
||||
distributions (Wishart).
|
||||
|
||||
degrees_of_freedom_ : array-like of shape (n_components,)
|
||||
The number of degrees of freedom of each components in the model.
|
||||
|
||||
covariance_prior_ : float or array-like
|
||||
The prior on the covariance distribution (Wishart).
|
||||
The shape depends on `covariance_type`::
|
||||
|
||||
(n_features, n_features) if 'full',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_features) if 'diag',
|
||||
float if 'spherical'
|
||||
|
||||
n_features_in_ : int
|
||||
Number of features seen during :term:`fit`.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||||
Names of features seen during :term:`fit`. Defined only when `X`
|
||||
has feature names that are all strings.
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
See Also
|
||||
--------
|
||||
GaussianMixture : Finite Gaussian mixture fit with EM.
|
||||
|
||||
References
|
||||
----------
|
||||
|
||||
.. [1] `Bishop, Christopher M. (2006). "Pattern recognition and machine
|
||||
learning". Vol. 4 No. 4. New York: Springer.
|
||||
<https://www.springer.com/kr/book/9780387310732>`_
|
||||
|
||||
.. [2] `Hagai Attias. (2000). "A Variational Bayesian Framework for
|
||||
Graphical Models". In Advances in Neural Information Processing
|
||||
Systems 12.
|
||||
<https://citeseerx.ist.psu.edu/doc_view/pid/ee844fd96db7041a9681b5a18bff008912052c7e>`_
|
||||
|
||||
.. [3] `Blei, David M. and Michael I. Jordan. (2006). "Variational
|
||||
inference for Dirichlet process mixtures". Bayesian analysis 1.1
|
||||
<https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.mixture import BayesianGaussianMixture
|
||||
>>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]])
|
||||
>>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X)
|
||||
>>> bgm.means_
|
||||
array([[2.49 , 2.29],
|
||||
[8.45, 4.52 ]])
|
||||
>>> bgm.predict([[0, 0], [9, 3]])
|
||||
array([0, 1])
|
||||
"""
|
||||
|
||||
_parameter_constraints: dict = {
|
||||
**BaseMixture._parameter_constraints,
|
||||
"covariance_type": [StrOptions({"spherical", "tied", "diag", "full"})],
|
||||
"weight_concentration_prior_type": [
|
||||
StrOptions({"dirichlet_process", "dirichlet_distribution"})
|
||||
],
|
||||
"weight_concentration_prior": [
|
||||
None,
|
||||
Interval(Real, 0.0, None, closed="neither"),
|
||||
],
|
||||
"mean_precision_prior": [None, Interval(Real, 0.0, None, closed="neither")],
|
||||
"mean_prior": [None, "array-like"],
|
||||
"degrees_of_freedom_prior": [None, Interval(Real, 0.0, None, closed="neither")],
|
||||
"covariance_prior": [
|
||||
None,
|
||||
"array-like",
|
||||
Interval(Real, 0.0, None, closed="neither"),
|
||||
],
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
n_components=1,
|
||||
covariance_type="full",
|
||||
tol=1e-3,
|
||||
reg_covar=1e-6,
|
||||
max_iter=100,
|
||||
n_init=1,
|
||||
init_params="kmeans",
|
||||
weight_concentration_prior_type="dirichlet_process",
|
||||
weight_concentration_prior=None,
|
||||
mean_precision_prior=None,
|
||||
mean_prior=None,
|
||||
degrees_of_freedom_prior=None,
|
||||
covariance_prior=None,
|
||||
random_state=None,
|
||||
warm_start=False,
|
||||
verbose=0,
|
||||
verbose_interval=10,
|
||||
):
|
||||
super().__init__(
|
||||
n_components=n_components,
|
||||
tol=tol,
|
||||
reg_covar=reg_covar,
|
||||
max_iter=max_iter,
|
||||
n_init=n_init,
|
||||
init_params=init_params,
|
||||
random_state=random_state,
|
||||
warm_start=warm_start,
|
||||
verbose=verbose,
|
||||
verbose_interval=verbose_interval,
|
||||
)
|
||||
|
||||
self.covariance_type = covariance_type
|
||||
self.weight_concentration_prior_type = weight_concentration_prior_type
|
||||
self.weight_concentration_prior = weight_concentration_prior
|
||||
self.mean_precision_prior = mean_precision_prior
|
||||
self.mean_prior = mean_prior
|
||||
self.degrees_of_freedom_prior = degrees_of_freedom_prior
|
||||
self.covariance_prior = covariance_prior
|
||||
|
||||
def _check_parameters(self, X):
|
||||
"""Check that the parameters are well defined.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
"""
|
||||
self._check_weights_parameters()
|
||||
self._check_means_parameters(X)
|
||||
self._check_precision_parameters(X)
|
||||
self._checkcovariance_prior_parameter(X)
|
||||
|
||||
def _check_weights_parameters(self):
|
||||
"""Check the parameter of the Dirichlet distribution."""
|
||||
if self.weight_concentration_prior is None:
|
||||
self.weight_concentration_prior_ = 1.0 / self.n_components
|
||||
else:
|
||||
self.weight_concentration_prior_ = self.weight_concentration_prior
|
||||
|
||||
def _check_means_parameters(self, X):
|
||||
"""Check the parameters of the Gaussian distribution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
"""
|
||||
_, n_features = X.shape
|
||||
|
||||
if self.mean_precision_prior is None:
|
||||
self.mean_precision_prior_ = 1.0
|
||||
else:
|
||||
self.mean_precision_prior_ = self.mean_precision_prior
|
||||
|
||||
if self.mean_prior is None:
|
||||
self.mean_prior_ = X.mean(axis=0)
|
||||
else:
|
||||
self.mean_prior_ = check_array(
|
||||
self.mean_prior, dtype=[np.float64, np.float32], ensure_2d=False
|
||||
)
|
||||
_check_shape(self.mean_prior_, (n_features,), "means")
|
||||
|
||||
def _check_precision_parameters(self, X):
|
||||
"""Check the prior parameters of the precision distribution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
"""
|
||||
_, n_features = X.shape
|
||||
|
||||
if self.degrees_of_freedom_prior is None:
|
||||
self.degrees_of_freedom_prior_ = n_features
|
||||
elif self.degrees_of_freedom_prior > n_features - 1.0:
|
||||
self.degrees_of_freedom_prior_ = self.degrees_of_freedom_prior
|
||||
else:
|
||||
raise ValueError(
|
||||
"The parameter 'degrees_of_freedom_prior' "
|
||||
"should be greater than %d, but got %.3f."
|
||||
% (n_features - 1, self.degrees_of_freedom_prior)
|
||||
)
|
||||
|
||||
def _checkcovariance_prior_parameter(self, X):
|
||||
"""Check the `covariance_prior_`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
"""
|
||||
_, n_features = X.shape
|
||||
|
||||
if self.covariance_prior is None:
|
||||
self.covariance_prior_ = {
|
||||
"full": np.atleast_2d(np.cov(X.T)),
|
||||
"tied": np.atleast_2d(np.cov(X.T)),
|
||||
"diag": np.var(X, axis=0, ddof=1),
|
||||
"spherical": np.var(X, axis=0, ddof=1).mean(),
|
||||
}[self.covariance_type]
|
||||
|
||||
elif self.covariance_type in ["full", "tied"]:
|
||||
self.covariance_prior_ = check_array(
|
||||
self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False
|
||||
)
|
||||
_check_shape(
|
||||
self.covariance_prior_,
|
||||
(n_features, n_features),
|
||||
"%s covariance_prior" % self.covariance_type,
|
||||
)
|
||||
_check_precision_matrix(self.covariance_prior_, self.covariance_type)
|
||||
elif self.covariance_type == "diag":
|
||||
self.covariance_prior_ = check_array(
|
||||
self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False
|
||||
)
|
||||
_check_shape(
|
||||
self.covariance_prior_,
|
||||
(n_features,),
|
||||
"%s covariance_prior" % self.covariance_type,
|
||||
)
|
||||
_check_precision_positivity(self.covariance_prior_, self.covariance_type)
|
||||
# spherical case
|
||||
else:
|
||||
self.covariance_prior_ = self.covariance_prior
|
||||
|
||||
def _initialize(self, X, resp):
|
||||
"""Initialization of the mixture parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
resp : array-like of shape (n_samples, n_components)
|
||||
"""
|
||||
nk, xk, sk = _estimate_gaussian_parameters(
|
||||
X, resp, self.reg_covar, self.covariance_type
|
||||
)
|
||||
|
||||
self._estimate_weights(nk)
|
||||
self._estimate_means(nk, xk)
|
||||
self._estimate_precisions(nk, xk, sk)
|
||||
|
||||
def _estimate_weights(self, nk):
|
||||
"""Estimate the parameters of the Dirichlet distribution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nk : array-like of shape (n_components,)
|
||||
"""
|
||||
if self.weight_concentration_prior_type == "dirichlet_process":
|
||||
# For dirichlet process weight_concentration will be a tuple
|
||||
# containing the two parameters of the beta distribution
|
||||
self.weight_concentration_ = (
|
||||
1.0 + nk,
|
||||
(
|
||||
self.weight_concentration_prior_
|
||||
+ np.hstack((np.cumsum(nk[::-1])[-2::-1], 0))
|
||||
),
|
||||
)
|
||||
else:
|
||||
# case Variational Gaussian mixture with dirichlet distribution
|
||||
self.weight_concentration_ = self.weight_concentration_prior_ + nk
|
||||
|
||||
def _estimate_means(self, nk, xk):
|
||||
"""Estimate the parameters of the Gaussian distribution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
xk : array-like of shape (n_components, n_features)
|
||||
"""
|
||||
self.mean_precision_ = self.mean_precision_prior_ + nk
|
||||
self.means_ = (
|
||||
self.mean_precision_prior_ * self.mean_prior_ + nk[:, np.newaxis] * xk
|
||||
) / self.mean_precision_[:, np.newaxis]
|
||||
|
||||
def _estimate_precisions(self, nk, xk, sk):
|
||||
"""Estimate the precisions parameters of the precision distribution.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
xk : array-like of shape (n_components, n_features)
|
||||
|
||||
sk : array-like
|
||||
The shape depends of `covariance_type`:
|
||||
'full' : (n_components, n_features, n_features)
|
||||
'tied' : (n_features, n_features)
|
||||
'diag' : (n_components, n_features)
|
||||
'spherical' : (n_components,)
|
||||
"""
|
||||
{
|
||||
"full": self._estimate_wishart_full,
|
||||
"tied": self._estimate_wishart_tied,
|
||||
"diag": self._estimate_wishart_diag,
|
||||
"spherical": self._estimate_wishart_spherical,
|
||||
}[self.covariance_type](nk, xk, sk)
|
||||
|
||||
self.precisions_cholesky_ = _compute_precision_cholesky(
|
||||
self.covariances_, self.covariance_type
|
||||
)
|
||||
|
||||
def _estimate_wishart_full(self, nk, xk, sk):
|
||||
"""Estimate the full Wishart distribution parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
xk : array-like of shape (n_components, n_features)
|
||||
|
||||
sk : array-like of shape (n_components, n_features, n_features)
|
||||
"""
|
||||
_, n_features = xk.shape
|
||||
|
||||
# Warning : in some Bishop book, there is a typo on the formula 10.63
|
||||
# `degrees_of_freedom_k = degrees_of_freedom_0 + Nk` is
|
||||
# the correct formula
|
||||
self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk
|
||||
|
||||
self.covariances_ = np.empty((self.n_components, n_features, n_features))
|
||||
|
||||
for k in range(self.n_components):
|
||||
diff = xk[k] - self.mean_prior_
|
||||
self.covariances_[k] = (
|
||||
self.covariance_prior_
|
||||
+ nk[k] * sk[k]
|
||||
+ nk[k]
|
||||
* self.mean_precision_prior_
|
||||
/ self.mean_precision_[k]
|
||||
* np.outer(diff, diff)
|
||||
)
|
||||
|
||||
# Contrary to the original bishop book, we normalize the covariances
|
||||
self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis, np.newaxis]
|
||||
|
||||
def _estimate_wishart_tied(self, nk, xk, sk):
|
||||
"""Estimate the tied Wishart distribution parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
xk : array-like of shape (n_components, n_features)
|
||||
|
||||
sk : array-like of shape (n_features, n_features)
|
||||
"""
|
||||
_, n_features = xk.shape
|
||||
|
||||
# Warning : in some Bishop book, there is a typo on the formula 10.63
|
||||
# `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`
|
||||
# is the correct formula
|
||||
self.degrees_of_freedom_ = (
|
||||
self.degrees_of_freedom_prior_ + nk.sum() / self.n_components
|
||||
)
|
||||
|
||||
diff = xk - self.mean_prior_
|
||||
self.covariances_ = (
|
||||
self.covariance_prior_
|
||||
+ sk * nk.sum() / self.n_components
|
||||
+ self.mean_precision_prior_
|
||||
/ self.n_components
|
||||
* np.dot((nk / self.mean_precision_) * diff.T, diff)
|
||||
)
|
||||
|
||||
# Contrary to the original bishop book, we normalize the covariances
|
||||
self.covariances_ /= self.degrees_of_freedom_
|
||||
|
||||
def _estimate_wishart_diag(self, nk, xk, sk):
|
||||
"""Estimate the diag Wishart distribution parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
xk : array-like of shape (n_components, n_features)
|
||||
|
||||
sk : array-like of shape (n_components, n_features)
|
||||
"""
|
||||
_, n_features = xk.shape
|
||||
|
||||
# Warning : in some Bishop book, there is a typo on the formula 10.63
|
||||
# `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`
|
||||
# is the correct formula
|
||||
self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk
|
||||
|
||||
diff = xk - self.mean_prior_
|
||||
self.covariances_ = self.covariance_prior_ + nk[:, np.newaxis] * (
|
||||
sk
|
||||
+ (self.mean_precision_prior_ / self.mean_precision_)[:, np.newaxis]
|
||||
* np.square(diff)
|
||||
)
|
||||
|
||||
# Contrary to the original bishop book, we normalize the covariances
|
||||
self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis]
|
||||
|
||||
def _estimate_wishart_spherical(self, nk, xk, sk):
|
||||
"""Estimate the spherical Wishart distribution parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
xk : array-like of shape (n_components, n_features)
|
||||
|
||||
sk : array-like of shape (n_components,)
|
||||
"""
|
||||
_, n_features = xk.shape
|
||||
|
||||
# Warning : in some Bishop book, there is a typo on the formula 10.63
|
||||
# `degrees_of_freedom_k = degrees_of_freedom_0 + Nk`
|
||||
# is the correct formula
|
||||
self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk
|
||||
|
||||
diff = xk - self.mean_prior_
|
||||
self.covariances_ = self.covariance_prior_ + nk * (
|
||||
sk
|
||||
+ self.mean_precision_prior_
|
||||
/ self.mean_precision_
|
||||
* np.mean(np.square(diff), 1)
|
||||
)
|
||||
|
||||
# Contrary to the original bishop book, we normalize the covariances
|
||||
self.covariances_ /= self.degrees_of_freedom_
|
||||
|
||||
def _m_step(self, X, log_resp):
|
||||
"""M step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
log_resp : array-like of shape (n_samples, n_components)
|
||||
Logarithm of the posterior probabilities (or responsibilities) of
|
||||
the point of each sample in X.
|
||||
"""
|
||||
n_samples, _ = X.shape
|
||||
|
||||
nk, xk, sk = _estimate_gaussian_parameters(
|
||||
X, np.exp(log_resp), self.reg_covar, self.covariance_type
|
||||
)
|
||||
self._estimate_weights(nk)
|
||||
self._estimate_means(nk, xk)
|
||||
self._estimate_precisions(nk, xk, sk)
|
||||
|
||||
def _estimate_log_weights(self):
|
||||
if self.weight_concentration_prior_type == "dirichlet_process":
|
||||
digamma_sum = digamma(
|
||||
self.weight_concentration_[0] + self.weight_concentration_[1]
|
||||
)
|
||||
digamma_a = digamma(self.weight_concentration_[0])
|
||||
digamma_b = digamma(self.weight_concentration_[1])
|
||||
return (
|
||||
digamma_a
|
||||
- digamma_sum
|
||||
+ np.hstack((0, np.cumsum(digamma_b - digamma_sum)[:-1]))
|
||||
)
|
||||
else:
|
||||
# case Variational Gaussian mixture with dirichlet distribution
|
||||
return digamma(self.weight_concentration_) - digamma(
|
||||
np.sum(self.weight_concentration_)
|
||||
)
|
||||
|
||||
def _estimate_log_prob(self, X):
|
||||
_, n_features = X.shape
|
||||
# We remove `n_features * np.log(self.degrees_of_freedom_)` because
|
||||
# the precision matrix is normalized
|
||||
log_gauss = _estimate_log_gaussian_prob(
|
||||
X, self.means_, self.precisions_cholesky_, self.covariance_type
|
||||
) - 0.5 * n_features * np.log(self.degrees_of_freedom_)
|
||||
|
||||
log_lambda = n_features * np.log(2.0) + np.sum(
|
||||
digamma(
|
||||
0.5
|
||||
* (self.degrees_of_freedom_ - np.arange(0, n_features)[:, np.newaxis])
|
||||
),
|
||||
0,
|
||||
)
|
||||
|
||||
return log_gauss + 0.5 * (log_lambda - n_features / self.mean_precision_)
|
||||
|
||||
def _compute_lower_bound(self, log_resp, log_prob_norm):
|
||||
"""Estimate the lower bound of the model.
|
||||
|
||||
The lower bound on the likelihood (of the training data with respect to
|
||||
the model) is used to detect the convergence and has to increase at
|
||||
each iteration.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
log_resp : array, shape (n_samples, n_components)
|
||||
Logarithm of the posterior probabilities (or responsibilities) of
|
||||
the point of each sample in X.
|
||||
|
||||
log_prob_norm : float
|
||||
Logarithm of the probability of each sample in X.
|
||||
|
||||
Returns
|
||||
-------
|
||||
lower_bound : float
|
||||
"""
|
||||
# Contrary to the original formula, we have done some simplification
|
||||
# and removed all the constant terms.
|
||||
(n_features,) = self.mean_prior_.shape
|
||||
|
||||
# We removed `.5 * n_features * np.log(self.degrees_of_freedom_)`
|
||||
# because the precision matrix is normalized.
|
||||
log_det_precisions_chol = _compute_log_det_cholesky(
|
||||
self.precisions_cholesky_, self.covariance_type, n_features
|
||||
) - 0.5 * n_features * np.log(self.degrees_of_freedom_)
|
||||
|
||||
if self.covariance_type == "tied":
|
||||
log_wishart = self.n_components * np.float64(
|
||||
_log_wishart_norm(
|
||||
self.degrees_of_freedom_, log_det_precisions_chol, n_features
|
||||
)
|
||||
)
|
||||
else:
|
||||
log_wishart = np.sum(
|
||||
_log_wishart_norm(
|
||||
self.degrees_of_freedom_, log_det_precisions_chol, n_features
|
||||
)
|
||||
)
|
||||
|
||||
if self.weight_concentration_prior_type == "dirichlet_process":
|
||||
log_norm_weight = -np.sum(
|
||||
betaln(self.weight_concentration_[0], self.weight_concentration_[1])
|
||||
)
|
||||
else:
|
||||
log_norm_weight = _log_dirichlet_norm(self.weight_concentration_)
|
||||
|
||||
return (
|
||||
-np.sum(np.exp(log_resp) * log_resp)
|
||||
- log_wishart
|
||||
- log_norm_weight
|
||||
- 0.5 * n_features * np.sum(np.log(self.mean_precision_))
|
||||
)
|
||||
|
||||
def _get_parameters(self):
|
||||
return (
|
||||
self.weight_concentration_,
|
||||
self.mean_precision_,
|
||||
self.means_,
|
||||
self.degrees_of_freedom_,
|
||||
self.covariances_,
|
||||
self.precisions_cholesky_,
|
||||
)
|
||||
|
||||
def _set_parameters(self, params):
|
||||
(
|
||||
self.weight_concentration_,
|
||||
self.mean_precision_,
|
||||
self.means_,
|
||||
self.degrees_of_freedom_,
|
||||
self.covariances_,
|
||||
self.precisions_cholesky_,
|
||||
) = params
|
||||
|
||||
# Weights computation
|
||||
if self.weight_concentration_prior_type == "dirichlet_process":
|
||||
weight_dirichlet_sum = (
|
||||
self.weight_concentration_[0] + self.weight_concentration_[1]
|
||||
)
|
||||
tmp = self.weight_concentration_[1] / weight_dirichlet_sum
|
||||
self.weights_ = (
|
||||
self.weight_concentration_[0]
|
||||
/ weight_dirichlet_sum
|
||||
* np.hstack((1, np.cumprod(tmp[:-1])))
|
||||
)
|
||||
self.weights_ /= np.sum(self.weights_)
|
||||
else:
|
||||
self.weights_ = self.weight_concentration_ / np.sum(
|
||||
self.weight_concentration_
|
||||
)
|
||||
|
||||
# Precisions matrices computation
|
||||
if self.covariance_type == "full":
|
||||
self.precisions_ = np.array(
|
||||
[
|
||||
np.dot(prec_chol, prec_chol.T)
|
||||
for prec_chol in self.precisions_cholesky_
|
||||
]
|
||||
)
|
||||
|
||||
elif self.covariance_type == "tied":
|
||||
self.precisions_ = np.dot(
|
||||
self.precisions_cholesky_, self.precisions_cholesky_.T
|
||||
)
|
||||
else:
|
||||
self.precisions_ = self.precisions_cholesky_**2
|
||||
@@ -0,0 +1,934 @@
|
||||
"""Gaussian Mixture Model."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
from scipy import linalg
|
||||
|
||||
from ..utils import check_array
|
||||
from ..utils._param_validation import StrOptions
|
||||
from ..utils.extmath import row_norms
|
||||
from ._base import BaseMixture, _check_shape
|
||||
|
||||
###############################################################################
|
||||
# Gaussian mixture shape checkers used by the GaussianMixture class
|
||||
|
||||
|
||||
def _check_weights(weights, n_components):
|
||||
"""Check the user provided 'weights'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
weights : array-like of shape (n_components,)
|
||||
The proportions of components of each mixture.
|
||||
|
||||
n_components : int
|
||||
Number of components.
|
||||
|
||||
Returns
|
||||
-------
|
||||
weights : array, shape (n_components,)
|
||||
"""
|
||||
weights = check_array(weights, dtype=[np.float64, np.float32], ensure_2d=False)
|
||||
_check_shape(weights, (n_components,), "weights")
|
||||
|
||||
# check range
|
||||
if any(np.less(weights, 0.0)) or any(np.greater(weights, 1.0)):
|
||||
raise ValueError(
|
||||
"The parameter 'weights' should be in the range "
|
||||
"[0, 1], but got max value %.5f, min value %.5f"
|
||||
% (np.min(weights), np.max(weights))
|
||||
)
|
||||
|
||||
# check normalization
|
||||
atol = 1e-6 if weights.dtype == np.float32 else 1e-8
|
||||
if not np.allclose(np.abs(1.0 - np.sum(weights)), 0.0, atol=atol):
|
||||
raise ValueError(
|
||||
"The parameter 'weights' should be normalized, but got sum(weights) = %.5f"
|
||||
% np.sum(weights)
|
||||
)
|
||||
return weights
|
||||
|
||||
|
||||
def _check_means(means, n_components, n_features):
|
||||
"""Validate the provided 'means'.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
means : array-like of shape (n_components, n_features)
|
||||
The centers of the current components.
|
||||
|
||||
n_components : int
|
||||
Number of components.
|
||||
|
||||
n_features : int
|
||||
Number of features.
|
||||
|
||||
Returns
|
||||
-------
|
||||
means : array, (n_components, n_features)
|
||||
"""
|
||||
means = check_array(means, dtype=[np.float64, np.float32], ensure_2d=False)
|
||||
_check_shape(means, (n_components, n_features), "means")
|
||||
return means
|
||||
|
||||
|
||||
def _check_precision_positivity(precision, covariance_type):
|
||||
"""Check a precision vector is positive-definite."""
|
||||
if np.any(np.less_equal(precision, 0.0)):
|
||||
raise ValueError("'%s precision' should be positive" % covariance_type)
|
||||
|
||||
|
||||
def _check_precision_matrix(precision, covariance_type):
|
||||
"""Check a precision matrix is symmetric and positive-definite."""
|
||||
if not (
|
||||
np.allclose(precision, precision.T) and np.all(linalg.eigvalsh(precision) > 0.0)
|
||||
):
|
||||
raise ValueError(
|
||||
"'%s precision' should be symmetric, positive-definite" % covariance_type
|
||||
)
|
||||
|
||||
|
||||
def _check_precisions_full(precisions, covariance_type):
|
||||
"""Check the precision matrices are symmetric and positive-definite."""
|
||||
for prec in precisions:
|
||||
_check_precision_matrix(prec, covariance_type)
|
||||
|
||||
|
||||
def _check_precisions(precisions, covariance_type, n_components, n_features):
|
||||
"""Validate user provided precisions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
precisions : array-like
|
||||
'full' : shape of (n_components, n_features, n_features)
|
||||
'tied' : shape of (n_features, n_features)
|
||||
'diag' : shape of (n_components, n_features)
|
||||
'spherical' : shape of (n_components,)
|
||||
|
||||
covariance_type : str
|
||||
|
||||
n_components : int
|
||||
Number of components.
|
||||
|
||||
n_features : int
|
||||
Number of features.
|
||||
|
||||
Returns
|
||||
-------
|
||||
precisions : array
|
||||
"""
|
||||
precisions = check_array(
|
||||
precisions,
|
||||
dtype=[np.float64, np.float32],
|
||||
ensure_2d=False,
|
||||
allow_nd=covariance_type == "full",
|
||||
)
|
||||
|
||||
precisions_shape = {
|
||||
"full": (n_components, n_features, n_features),
|
||||
"tied": (n_features, n_features),
|
||||
"diag": (n_components, n_features),
|
||||
"spherical": (n_components,),
|
||||
}
|
||||
_check_shape(
|
||||
precisions, precisions_shape[covariance_type], "%s precision" % covariance_type
|
||||
)
|
||||
|
||||
_check_precisions = {
|
||||
"full": _check_precisions_full,
|
||||
"tied": _check_precision_matrix,
|
||||
"diag": _check_precision_positivity,
|
||||
"spherical": _check_precision_positivity,
|
||||
}
|
||||
_check_precisions[covariance_type](precisions, covariance_type)
|
||||
return precisions
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Gaussian mixture parameters estimators (used by the M-Step)
|
||||
|
||||
|
||||
def _estimate_gaussian_covariances_full(resp, X, nk, means, reg_covar):
|
||||
"""Estimate the full covariance matrices.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
resp : array-like of shape (n_samples, n_components)
|
||||
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
means : array-like of shape (n_components, n_features)
|
||||
|
||||
reg_covar : float
|
||||
|
||||
Returns
|
||||
-------
|
||||
covariances : array, shape (n_components, n_features, n_features)
|
||||
The covariance matrix of the current components.
|
||||
"""
|
||||
n_components, n_features = means.shape
|
||||
covariances = np.empty((n_components, n_features, n_features), dtype=X.dtype)
|
||||
for k in range(n_components):
|
||||
diff = X - means[k]
|
||||
covariances[k] = np.dot(resp[:, k] * diff.T, diff) / nk[k]
|
||||
covariances[k].flat[:: n_features + 1] += reg_covar
|
||||
return covariances
|
||||
|
||||
|
||||
def _estimate_gaussian_covariances_tied(resp, X, nk, means, reg_covar):
|
||||
"""Estimate the tied covariance matrix.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
resp : array-like of shape (n_samples, n_components)
|
||||
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
means : array-like of shape (n_components, n_features)
|
||||
|
||||
reg_covar : float
|
||||
|
||||
Returns
|
||||
-------
|
||||
covariance : array, shape (n_features, n_features)
|
||||
The tied covariance matrix of the components.
|
||||
"""
|
||||
avg_X2 = np.dot(X.T, X)
|
||||
avg_means2 = np.dot(nk * means.T, means)
|
||||
covariance = avg_X2 - avg_means2
|
||||
covariance /= nk.sum()
|
||||
covariance.flat[:: len(covariance) + 1] += reg_covar
|
||||
return covariance
|
||||
|
||||
|
||||
def _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar):
|
||||
"""Estimate the diagonal covariance vectors.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
responsibilities : array-like of shape (n_samples, n_components)
|
||||
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
means : array-like of shape (n_components, n_features)
|
||||
|
||||
reg_covar : float
|
||||
|
||||
Returns
|
||||
-------
|
||||
covariances : array, shape (n_components, n_features)
|
||||
The covariance vector of the current components.
|
||||
"""
|
||||
avg_X2 = np.dot(resp.T, X * X) / nk[:, np.newaxis]
|
||||
avg_means2 = means**2
|
||||
return avg_X2 - avg_means2 + reg_covar
|
||||
|
||||
|
||||
def _estimate_gaussian_covariances_spherical(resp, X, nk, means, reg_covar):
|
||||
"""Estimate the spherical variance values.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
responsibilities : array-like of shape (n_samples, n_components)
|
||||
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
nk : array-like of shape (n_components,)
|
||||
|
||||
means : array-like of shape (n_components, n_features)
|
||||
|
||||
reg_covar : float
|
||||
|
||||
Returns
|
||||
-------
|
||||
variances : array, shape (n_components,)
|
||||
The variance values of each components.
|
||||
"""
|
||||
return _estimate_gaussian_covariances_diag(resp, X, nk, means, reg_covar).mean(1)
|
||||
|
||||
|
||||
def _estimate_gaussian_parameters(X, resp, reg_covar, covariance_type):
|
||||
"""Estimate the Gaussian distribution parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
The input data array.
|
||||
|
||||
resp : array-like of shape (n_samples, n_components)
|
||||
The responsibilities for each data sample in X.
|
||||
|
||||
reg_covar : float
|
||||
The regularization added to the diagonal of the covariance matrices.
|
||||
|
||||
covariance_type : {'full', 'tied', 'diag', 'spherical'}
|
||||
The type of precision matrices.
|
||||
|
||||
Returns
|
||||
-------
|
||||
nk : array-like of shape (n_components,)
|
||||
The numbers of data samples in the current components.
|
||||
|
||||
means : array-like of shape (n_components, n_features)
|
||||
The centers of the current components.
|
||||
|
||||
covariances : array-like
|
||||
The covariance matrix of the current components.
|
||||
The shape depends of the covariance_type.
|
||||
"""
|
||||
nk = resp.sum(axis=0) + 10 * np.finfo(resp.dtype).eps
|
||||
means = np.dot(resp.T, X) / nk[:, np.newaxis]
|
||||
covariances = {
|
||||
"full": _estimate_gaussian_covariances_full,
|
||||
"tied": _estimate_gaussian_covariances_tied,
|
||||
"diag": _estimate_gaussian_covariances_diag,
|
||||
"spherical": _estimate_gaussian_covariances_spherical,
|
||||
}[covariance_type](resp, X, nk, means, reg_covar)
|
||||
return nk, means, covariances
|
||||
|
||||
|
||||
def _compute_precision_cholesky(covariances, covariance_type):
|
||||
"""Compute the Cholesky decomposition of the precisions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
covariances : array-like
|
||||
The covariance matrix of the current components.
|
||||
The shape depends of the covariance_type.
|
||||
|
||||
covariance_type : {'full', 'tied', 'diag', 'spherical'}
|
||||
The type of precision matrices.
|
||||
|
||||
Returns
|
||||
-------
|
||||
precisions_cholesky : array-like
|
||||
The cholesky decomposition of sample precisions of the current
|
||||
components. The shape depends of the covariance_type.
|
||||
"""
|
||||
estimate_precision_error_message = (
|
||||
"Fitting the mixture model failed because some components have "
|
||||
"ill-defined empirical covariance (for instance caused by singleton "
|
||||
"or collapsed samples). Try to decrease the number of components, "
|
||||
"increase reg_covar, or scale the input data."
|
||||
)
|
||||
dtype = covariances.dtype
|
||||
if dtype == np.float32:
|
||||
estimate_precision_error_message += (
|
||||
" The numerical accuracy can also be improved by passing float64"
|
||||
" data instead of float32."
|
||||
)
|
||||
|
||||
if covariance_type == "full":
|
||||
n_components, n_features, _ = covariances.shape
|
||||
precisions_chol = np.empty((n_components, n_features, n_features), dtype=dtype)
|
||||
for k, covariance in enumerate(covariances):
|
||||
try:
|
||||
cov_chol = linalg.cholesky(covariance, lower=True)
|
||||
except linalg.LinAlgError:
|
||||
raise ValueError(estimate_precision_error_message)
|
||||
precisions_chol[k] = linalg.solve_triangular(
|
||||
cov_chol, np.eye(n_features, dtype=dtype), lower=True
|
||||
).T
|
||||
elif covariance_type == "tied":
|
||||
_, n_features = covariances.shape
|
||||
try:
|
||||
cov_chol = linalg.cholesky(covariances, lower=True)
|
||||
except linalg.LinAlgError:
|
||||
raise ValueError(estimate_precision_error_message)
|
||||
precisions_chol = linalg.solve_triangular(
|
||||
cov_chol, np.eye(n_features, dtype=dtype), lower=True
|
||||
).T
|
||||
else:
|
||||
if np.any(np.less_equal(covariances, 0.0)):
|
||||
raise ValueError(estimate_precision_error_message)
|
||||
precisions_chol = 1.0 / np.sqrt(covariances)
|
||||
return precisions_chol
|
||||
|
||||
|
||||
def _flipudlr(array):
|
||||
"""Reverse the rows and columns of an array."""
|
||||
return np.flipud(np.fliplr(array))
|
||||
|
||||
|
||||
def _compute_precision_cholesky_from_precisions(precisions, covariance_type):
|
||||
r"""Compute the Cholesky decomposition of precisions using precisions themselves.
|
||||
|
||||
As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is
|
||||
an upper-triangular matrix for each Gaussian component, which can be expressed as
|
||||
the $UU^T$ factorization of the precision matrix for each Gaussian component, where
|
||||
$U$ is an upper-triangular matrix.
|
||||
|
||||
In order to use the Cholesky decomposition to get $UU^T$, the precision matrix
|
||||
$\Lambda$ needs to be permutated such that its rows and columns are reversed, which
|
||||
can be done by applying a similarity transformation with an exchange matrix $J$,
|
||||
where the 1 elements reside on the anti-diagonal and all other elements are 0. In
|
||||
particular, the Cholesky decomposition of the transformed precision matrix is
|
||||
$J\Lambda J=LL^T$, where $L$ is a lower-triangular matrix. Because $\Lambda=UU^T$
|
||||
and $J=J^{-1}=J^T$, the `precisions_cholesky_` for each Gaussian component can be
|
||||
expressed as $JLJ$.
|
||||
|
||||
Refer to #26415 for details.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
precisions : array-like
|
||||
The precision matrix of the current components.
|
||||
The shape depends on the covariance_type.
|
||||
|
||||
covariance_type : {'full', 'tied', 'diag', 'spherical'}
|
||||
The type of precision matrices.
|
||||
|
||||
Returns
|
||||
-------
|
||||
precisions_cholesky : array-like
|
||||
The cholesky decomposition of sample precisions of the current
|
||||
components. The shape depends on the covariance_type.
|
||||
"""
|
||||
if covariance_type == "full":
|
||||
precisions_cholesky = np.array(
|
||||
[
|
||||
_flipudlr(linalg.cholesky(_flipudlr(precision), lower=True))
|
||||
for precision in precisions
|
||||
]
|
||||
)
|
||||
elif covariance_type == "tied":
|
||||
precisions_cholesky = _flipudlr(
|
||||
linalg.cholesky(_flipudlr(precisions), lower=True)
|
||||
)
|
||||
else:
|
||||
precisions_cholesky = np.sqrt(precisions)
|
||||
return precisions_cholesky
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Gaussian mixture probability estimators
|
||||
def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features):
|
||||
"""Compute the log-det of the cholesky decomposition of matrices.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
matrix_chol : array-like
|
||||
Cholesky decompositions of the matrices.
|
||||
'full' : shape of (n_components, n_features, n_features)
|
||||
'tied' : shape of (n_features, n_features)
|
||||
'diag' : shape of (n_components, n_features)
|
||||
'spherical' : shape of (n_components,)
|
||||
|
||||
covariance_type : {'full', 'tied', 'diag', 'spherical'}
|
||||
|
||||
n_features : int
|
||||
Number of features.
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_det_precision_chol : array-like of shape (n_components,)
|
||||
The determinant of the precision matrix for each component.
|
||||
"""
|
||||
if covariance_type == "full":
|
||||
n_components, _, _ = matrix_chol.shape
|
||||
log_det_chol = np.sum(
|
||||
np.log(matrix_chol.reshape(n_components, -1)[:, :: n_features + 1]), axis=1
|
||||
)
|
||||
|
||||
elif covariance_type == "tied":
|
||||
log_det_chol = np.sum(np.log(np.diag(matrix_chol)))
|
||||
|
||||
elif covariance_type == "diag":
|
||||
log_det_chol = np.sum(np.log(matrix_chol), axis=1)
|
||||
|
||||
else:
|
||||
log_det_chol = n_features * np.log(matrix_chol)
|
||||
|
||||
return log_det_chol
|
||||
|
||||
|
||||
def _estimate_log_gaussian_prob(X, means, precisions_chol, covariance_type):
|
||||
"""Estimate the log Gaussian probability.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
means : array-like of shape (n_components, n_features)
|
||||
|
||||
precisions_chol : array-like
|
||||
Cholesky decompositions of the precision matrices.
|
||||
'full' : shape of (n_components, n_features, n_features)
|
||||
'tied' : shape of (n_features, n_features)
|
||||
'diag' : shape of (n_components, n_features)
|
||||
'spherical' : shape of (n_components,)
|
||||
|
||||
covariance_type : {'full', 'tied', 'diag', 'spherical'}
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_prob : array, shape (n_samples, n_components)
|
||||
"""
|
||||
n_samples, n_features = X.shape
|
||||
n_components, _ = means.shape
|
||||
# The determinant of the precision matrix from the Cholesky decomposition
|
||||
# corresponds to the negative half of the determinant of the full precision
|
||||
# matrix.
|
||||
# In short: det(precision_chol) = - det(precision) / 2
|
||||
log_det = _compute_log_det_cholesky(precisions_chol, covariance_type, n_features)
|
||||
|
||||
if covariance_type == "full":
|
||||
log_prob = np.empty((n_samples, n_components), dtype=X.dtype)
|
||||
for k, (mu, prec_chol) in enumerate(zip(means, precisions_chol)):
|
||||
y = np.dot(X, prec_chol) - np.dot(mu, prec_chol)
|
||||
log_prob[:, k] = np.sum(np.square(y), axis=1)
|
||||
|
||||
elif covariance_type == "tied":
|
||||
log_prob = np.empty((n_samples, n_components), dtype=X.dtype)
|
||||
for k, mu in enumerate(means):
|
||||
y = np.dot(X, precisions_chol) - np.dot(mu, precisions_chol)
|
||||
log_prob[:, k] = np.sum(np.square(y), axis=1)
|
||||
|
||||
elif covariance_type == "diag":
|
||||
precisions = precisions_chol**2
|
||||
log_prob = (
|
||||
np.sum((means**2 * precisions), 1)
|
||||
- 2.0 * np.dot(X, (means * precisions).T)
|
||||
+ np.dot(X**2, precisions.T)
|
||||
)
|
||||
|
||||
elif covariance_type == "spherical":
|
||||
precisions = precisions_chol**2
|
||||
log_prob = (
|
||||
np.sum(means**2, 1) * precisions
|
||||
- 2 * np.dot(X, means.T * precisions)
|
||||
+ np.outer(row_norms(X, squared=True), precisions)
|
||||
)
|
||||
# Since we are using the precision of the Cholesky decomposition,
|
||||
# `- 0.5 * log_det_precision` becomes `+ log_det_precision_chol`
|
||||
return -0.5 * (n_features * np.log(2 * np.pi).astype(X.dtype) + log_prob) + log_det
|
||||
|
||||
|
||||
class GaussianMixture(BaseMixture):
|
||||
"""Gaussian Mixture.
|
||||
|
||||
Representation of a Gaussian mixture model probability distribution.
|
||||
This class allows to estimate the parameters of a Gaussian mixture
|
||||
distribution.
|
||||
|
||||
Read more in the :ref:`User Guide <gmm>`.
|
||||
|
||||
.. versionadded:: 0.18
|
||||
|
||||
Parameters
|
||||
----------
|
||||
n_components : int, default=1
|
||||
The number of mixture components.
|
||||
|
||||
covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full'
|
||||
String describing the type of covariance parameters to use.
|
||||
Must be one of:
|
||||
|
||||
- 'full': each component has its own general covariance matrix.
|
||||
- 'tied': all components share the same general covariance matrix.
|
||||
- 'diag': each component has its own diagonal covariance matrix.
|
||||
- 'spherical': each component has its own single variance.
|
||||
|
||||
For an example of using `covariance_type`, refer to
|
||||
:ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py`.
|
||||
|
||||
tol : float, default=1e-3
|
||||
The convergence threshold. EM iterations will stop when the
|
||||
lower bound average gain is below this threshold.
|
||||
|
||||
reg_covar : float, default=1e-6
|
||||
Non-negative regularization added to the diagonal of covariance.
|
||||
Allows to assure that the covariance matrices are all positive.
|
||||
|
||||
max_iter : int, default=100
|
||||
The number of EM iterations to perform.
|
||||
|
||||
n_init : int, default=1
|
||||
The number of initializations to perform. The best results are kept.
|
||||
|
||||
init_params : {'kmeans', 'k-means++', 'random', 'random_from_data'}, \
|
||||
default='kmeans'
|
||||
The method used to initialize the weights, the means and the
|
||||
precisions.
|
||||
String must be one of:
|
||||
|
||||
- 'kmeans' : responsibilities are initialized using kmeans.
|
||||
- 'k-means++' : use the k-means++ method to initialize.
|
||||
- 'random' : responsibilities are initialized randomly.
|
||||
- 'random_from_data' : initial means are randomly selected data points.
|
||||
|
||||
.. versionchanged:: v1.1
|
||||
`init_params` now accepts 'random_from_data' and 'k-means++' as
|
||||
initialization methods.
|
||||
|
||||
weights_init : array-like of shape (n_components, ), default=None
|
||||
The user-provided initial weights.
|
||||
If it is None, weights are initialized using the `init_params` method.
|
||||
|
||||
means_init : array-like of shape (n_components, n_features), default=None
|
||||
The user-provided initial means,
|
||||
If it is None, means are initialized using the `init_params` method.
|
||||
|
||||
precisions_init : array-like, default=None
|
||||
The user-provided initial precisions (inverse of the covariance
|
||||
matrices).
|
||||
If it is None, precisions are initialized using the 'init_params'
|
||||
method.
|
||||
The shape depends on 'covariance_type'::
|
||||
|
||||
(n_components,) if 'spherical',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_components, n_features) if 'diag',
|
||||
(n_components, n_features, n_features) if 'full'
|
||||
|
||||
random_state : int, RandomState instance or None, default=None
|
||||
Controls the random seed given to the method chosen to initialize the
|
||||
parameters (see `init_params`).
|
||||
In addition, it controls the generation of random samples from the
|
||||
fitted distribution (see the method `sample`).
|
||||
Pass an int for reproducible output across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
warm_start : bool, default=False
|
||||
If 'warm_start' is True, the solution of the last fitting is used as
|
||||
initialization for the next call of fit(). This can speed up
|
||||
convergence when fit is called several times on similar problems.
|
||||
In that case, 'n_init' is ignored and only a single initialization
|
||||
occurs upon the first call.
|
||||
See :term:`the Glossary <warm_start>`.
|
||||
|
||||
verbose : int, default=0
|
||||
Enable verbose output. If 1 then it prints the current
|
||||
initialization and each iteration step. If greater than 1 then
|
||||
it prints also the log probability and the time needed
|
||||
for each step.
|
||||
|
||||
verbose_interval : int, default=10
|
||||
Number of iteration done before the next print.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
weights_ : array-like of shape (n_components,)
|
||||
The weights of each mixture components.
|
||||
|
||||
means_ : array-like of shape (n_components, n_features)
|
||||
The mean of each mixture component.
|
||||
|
||||
covariances_ : array-like
|
||||
The covariance of each mixture component.
|
||||
The shape depends on `covariance_type`::
|
||||
|
||||
(n_components,) if 'spherical',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_components, n_features) if 'diag',
|
||||
(n_components, n_features, n_features) if 'full'
|
||||
|
||||
For an example of using covariances, refer to
|
||||
:ref:`sphx_glr_auto_examples_mixture_plot_gmm_covariances.py`.
|
||||
|
||||
precisions_ : array-like
|
||||
The precision matrices for each component in the mixture. A precision
|
||||
matrix is the inverse of a covariance matrix. A covariance matrix is
|
||||
symmetric positive definite so the mixture of Gaussian can be
|
||||
equivalently parameterized by the precision matrices. Storing the
|
||||
precision matrices instead of the covariance matrices makes it more
|
||||
efficient to compute the log-likelihood of new samples at test time.
|
||||
The shape depends on `covariance_type`::
|
||||
|
||||
(n_components,) if 'spherical',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_components, n_features) if 'diag',
|
||||
(n_components, n_features, n_features) if 'full'
|
||||
|
||||
precisions_cholesky_ : array-like
|
||||
The cholesky decomposition of the precision matrices of each mixture
|
||||
component. A precision matrix is the inverse of a covariance matrix.
|
||||
A covariance matrix is symmetric positive definite so the mixture of
|
||||
Gaussian can be equivalently parameterized by the precision matrices.
|
||||
Storing the precision matrices instead of the covariance matrices makes
|
||||
it more efficient to compute the log-likelihood of new samples at test
|
||||
time. The shape depends on `covariance_type`::
|
||||
|
||||
(n_components,) if 'spherical',
|
||||
(n_features, n_features) if 'tied',
|
||||
(n_components, n_features) if 'diag',
|
||||
(n_components, n_features, n_features) if 'full'
|
||||
|
||||
converged_ : bool
|
||||
True when convergence of the best fit of EM was reached, False otherwise.
|
||||
|
||||
n_iter_ : int
|
||||
Number of step used by the best fit of EM to reach the convergence.
|
||||
|
||||
lower_bound_ : float
|
||||
Lower bound value on the log-likelihood (of the training data with
|
||||
respect to the model) of the best fit of EM.
|
||||
|
||||
lower_bounds_ : array-like of shape (`n_iter_`,)
|
||||
The list of lower bound values on the log-likelihood from each
|
||||
iteration of the best fit of EM.
|
||||
|
||||
n_features_in_ : int
|
||||
Number of features seen during :term:`fit`.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||||
Names of features seen during :term:`fit`. Defined only when `X`
|
||||
has feature names that are all strings.
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
See Also
|
||||
--------
|
||||
BayesianGaussianMixture : Gaussian mixture model fit with a variational
|
||||
inference.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.mixture import GaussianMixture
|
||||
>>> X = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
|
||||
>>> gm = GaussianMixture(n_components=2, random_state=0).fit(X)
|
||||
>>> gm.means_
|
||||
array([[10., 2.],
|
||||
[ 1., 2.]])
|
||||
>>> gm.predict([[0, 0], [12, 3]])
|
||||
array([1, 0])
|
||||
|
||||
For a comparison of Gaussian Mixture with other clustering algorithms, see
|
||||
:ref:`sphx_glr_auto_examples_cluster_plot_cluster_comparison.py`
|
||||
"""
|
||||
|
||||
_parameter_constraints: dict = {
|
||||
**BaseMixture._parameter_constraints,
|
||||
"covariance_type": [StrOptions({"full", "tied", "diag", "spherical"})],
|
||||
"weights_init": ["array-like", None],
|
||||
"means_init": ["array-like", None],
|
||||
"precisions_init": ["array-like", None],
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
n_components=1,
|
||||
*,
|
||||
covariance_type="full",
|
||||
tol=1e-3,
|
||||
reg_covar=1e-6,
|
||||
max_iter=100,
|
||||
n_init=1,
|
||||
init_params="kmeans",
|
||||
weights_init=None,
|
||||
means_init=None,
|
||||
precisions_init=None,
|
||||
random_state=None,
|
||||
warm_start=False,
|
||||
verbose=0,
|
||||
verbose_interval=10,
|
||||
):
|
||||
super().__init__(
|
||||
n_components=n_components,
|
||||
tol=tol,
|
||||
reg_covar=reg_covar,
|
||||
max_iter=max_iter,
|
||||
n_init=n_init,
|
||||
init_params=init_params,
|
||||
random_state=random_state,
|
||||
warm_start=warm_start,
|
||||
verbose=verbose,
|
||||
verbose_interval=verbose_interval,
|
||||
)
|
||||
|
||||
self.covariance_type = covariance_type
|
||||
self.weights_init = weights_init
|
||||
self.means_init = means_init
|
||||
self.precisions_init = precisions_init
|
||||
|
||||
def _check_parameters(self, X):
|
||||
"""Check the Gaussian mixture parameters are well defined."""
|
||||
_, n_features = X.shape
|
||||
|
||||
if self.weights_init is not None:
|
||||
self.weights_init = _check_weights(self.weights_init, self.n_components)
|
||||
|
||||
if self.means_init is not None:
|
||||
self.means_init = _check_means(
|
||||
self.means_init, self.n_components, n_features
|
||||
)
|
||||
|
||||
if self.precisions_init is not None:
|
||||
self.precisions_init = _check_precisions(
|
||||
self.precisions_init,
|
||||
self.covariance_type,
|
||||
self.n_components,
|
||||
n_features,
|
||||
)
|
||||
|
||||
def _initialize_parameters(self, X, random_state):
|
||||
# If all the initial parameters are all provided, then there is no need to run
|
||||
# the initialization.
|
||||
compute_resp = (
|
||||
self.weights_init is None
|
||||
or self.means_init is None
|
||||
or self.precisions_init is None
|
||||
)
|
||||
if compute_resp:
|
||||
super()._initialize_parameters(X, random_state)
|
||||
else:
|
||||
self._initialize(X, None)
|
||||
|
||||
def _initialize(self, X, resp):
|
||||
"""Initialization of the Gaussian mixture parameters.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
resp : array-like of shape (n_samples, n_components)
|
||||
"""
|
||||
n_samples, _ = X.shape
|
||||
weights, means, covariances = None, None, None
|
||||
if resp is not None:
|
||||
weights, means, covariances = _estimate_gaussian_parameters(
|
||||
X, resp, self.reg_covar, self.covariance_type
|
||||
)
|
||||
if self.weights_init is None:
|
||||
weights /= n_samples
|
||||
|
||||
self.weights_ = weights if self.weights_init is None else self.weights_init
|
||||
self.means_ = means if self.means_init is None else self.means_init
|
||||
|
||||
if self.precisions_init is None:
|
||||
self.covariances_ = covariances
|
||||
self.precisions_cholesky_ = _compute_precision_cholesky(
|
||||
covariances, self.covariance_type
|
||||
)
|
||||
else:
|
||||
self.precisions_cholesky_ = _compute_precision_cholesky_from_precisions(
|
||||
self.precisions_init, self.covariance_type
|
||||
)
|
||||
|
||||
def _m_step(self, X, log_resp):
|
||||
"""M step.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
|
||||
log_resp : array-like of shape (n_samples, n_components)
|
||||
Logarithm of the posterior probabilities (or responsibilities) of
|
||||
the point of each sample in X.
|
||||
"""
|
||||
self.weights_, self.means_, self.covariances_ = _estimate_gaussian_parameters(
|
||||
X, np.exp(log_resp), self.reg_covar, self.covariance_type
|
||||
)
|
||||
self.weights_ /= self.weights_.sum()
|
||||
self.precisions_cholesky_ = _compute_precision_cholesky(
|
||||
self.covariances_, self.covariance_type
|
||||
)
|
||||
|
||||
def _estimate_log_prob(self, X):
|
||||
return _estimate_log_gaussian_prob(
|
||||
X, self.means_, self.precisions_cholesky_, self.covariance_type
|
||||
)
|
||||
|
||||
def _estimate_log_weights(self):
|
||||
return np.log(self.weights_)
|
||||
|
||||
def _compute_lower_bound(self, _, log_prob_norm):
|
||||
return log_prob_norm
|
||||
|
||||
def _get_parameters(self):
|
||||
return (
|
||||
self.weights_,
|
||||
self.means_,
|
||||
self.covariances_,
|
||||
self.precisions_cholesky_,
|
||||
)
|
||||
|
||||
def _set_parameters(self, params):
|
||||
(
|
||||
self.weights_,
|
||||
self.means_,
|
||||
self.covariances_,
|
||||
self.precisions_cholesky_,
|
||||
) = params
|
||||
|
||||
# Attributes computation
|
||||
_, n_features = self.means_.shape
|
||||
|
||||
dtype = self.precisions_cholesky_.dtype
|
||||
if self.covariance_type == "full":
|
||||
self.precisions_ = np.empty_like(self.precisions_cholesky_)
|
||||
for k, prec_chol in enumerate(self.precisions_cholesky_):
|
||||
self.precisions_[k] = np.dot(prec_chol, prec_chol.T)
|
||||
|
||||
elif self.covariance_type == "tied":
|
||||
self.precisions_ = np.dot(
|
||||
self.precisions_cholesky_, self.precisions_cholesky_.T
|
||||
)
|
||||
else:
|
||||
self.precisions_ = self.precisions_cholesky_**2
|
||||
|
||||
def _n_parameters(self):
|
||||
"""Return the number of free parameters in the model."""
|
||||
_, n_features = self.means_.shape
|
||||
if self.covariance_type == "full":
|
||||
cov_params = self.n_components * n_features * (n_features + 1) / 2.0
|
||||
elif self.covariance_type == "diag":
|
||||
cov_params = self.n_components * n_features
|
||||
elif self.covariance_type == "tied":
|
||||
cov_params = n_features * (n_features + 1) / 2.0
|
||||
elif self.covariance_type == "spherical":
|
||||
cov_params = self.n_components
|
||||
mean_params = n_features * self.n_components
|
||||
return int(cov_params + mean_params + self.n_components - 1)
|
||||
|
||||
def bic(self, X):
|
||||
"""Bayesian information criterion for the current model on the input X.
|
||||
|
||||
You can refer to this :ref:`mathematical section <aic_bic>` for more
|
||||
details regarding the formulation of the BIC used.
|
||||
|
||||
For an example of GMM selection using `bic` information criterion,
|
||||
refer to :ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array of shape (n_samples, n_dimensions)
|
||||
The input samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
bic : float
|
||||
The lower the better.
|
||||
"""
|
||||
return -2 * self.score(X) * X.shape[0] + self._n_parameters() * np.log(
|
||||
X.shape[0]
|
||||
)
|
||||
|
||||
def aic(self, X):
|
||||
"""Akaike information criterion for the current model on the input X.
|
||||
|
||||
You can refer to this :ref:`mathematical section <aic_bic>` for more
|
||||
details regarding the formulation of the AIC used.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : array of shape (n_samples, n_dimensions)
|
||||
The input samples.
|
||||
|
||||
Returns
|
||||
-------
|
||||
aic : float
|
||||
The lower the better.
|
||||
"""
|
||||
return -2 * self.score(X) * X.shape[0] + 2 * self._n_parameters()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,464 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy.special import gammaln
|
||||
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.metrics.cluster import adjusted_rand_score
|
||||
from sklearn.mixture import BayesianGaussianMixture
|
||||
from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm, _log_wishart_norm
|
||||
from sklearn.mixture.tests.test_gaussian_mixture import RandomData
|
||||
from sklearn.utils._testing import (
|
||||
assert_almost_equal,
|
||||
assert_array_equal,
|
||||
)
|
||||
|
||||
COVARIANCE_TYPE = ["full", "tied", "diag", "spherical"]
|
||||
PRIOR_TYPE = ["dirichlet_process", "dirichlet_distribution"]
|
||||
|
||||
|
||||
def test_log_dirichlet_norm():
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
weight_concentration = rng.rand(2)
|
||||
expected_norm = gammaln(np.sum(weight_concentration)) - np.sum(
|
||||
gammaln(weight_concentration)
|
||||
)
|
||||
predected_norm = _log_dirichlet_norm(weight_concentration)
|
||||
|
||||
assert_almost_equal(expected_norm, predected_norm)
|
||||
|
||||
|
||||
def test_log_wishart_norm():
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
n_components, n_features = 5, 2
|
||||
degrees_of_freedom = np.abs(rng.rand(n_components)) + 1.0
|
||||
log_det_precisions_chol = n_features * np.log(range(2, 2 + n_components))
|
||||
|
||||
expected_norm = np.empty(5)
|
||||
for k, (degrees_of_freedom_k, log_det_k) in enumerate(
|
||||
zip(degrees_of_freedom, log_det_precisions_chol)
|
||||
):
|
||||
expected_norm[k] = -(
|
||||
degrees_of_freedom_k * (log_det_k + 0.5 * n_features * np.log(2.0))
|
||||
+ np.sum(
|
||||
gammaln(
|
||||
0.5
|
||||
* (degrees_of_freedom_k - np.arange(0, n_features)[:, np.newaxis])
|
||||
),
|
||||
0,
|
||||
)
|
||||
).item()
|
||||
predected_norm = _log_wishart_norm(
|
||||
degrees_of_freedom, log_det_precisions_chol, n_features
|
||||
)
|
||||
|
||||
assert_almost_equal(expected_norm, predected_norm)
|
||||
|
||||
|
||||
def test_bayesian_mixture_weights_prior_initialisation():
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples, n_components, n_features = 10, 5, 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
|
||||
# Check correct init for a given value of weight_concentration_prior
|
||||
weight_concentration_prior = rng.rand()
|
||||
bgmm = BayesianGaussianMixture(
|
||||
weight_concentration_prior=weight_concentration_prior, random_state=rng
|
||||
).fit(X)
|
||||
assert_almost_equal(weight_concentration_prior, bgmm.weight_concentration_prior_)
|
||||
|
||||
# Check correct init for the default value of weight_concentration_prior
|
||||
bgmm = BayesianGaussianMixture(n_components=n_components, random_state=rng).fit(X)
|
||||
assert_almost_equal(1.0 / n_components, bgmm.weight_concentration_prior_)
|
||||
|
||||
|
||||
def test_bayesian_mixture_mean_prior_initialisation():
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples, n_components, n_features = 10, 3, 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
|
||||
# Check correct init for a given value of mean_precision_prior
|
||||
mean_precision_prior = rng.rand()
|
||||
bgmm = BayesianGaussianMixture(
|
||||
mean_precision_prior=mean_precision_prior, random_state=rng
|
||||
).fit(X)
|
||||
assert_almost_equal(mean_precision_prior, bgmm.mean_precision_prior_)
|
||||
|
||||
# Check correct init for the default value of mean_precision_prior
|
||||
bgmm = BayesianGaussianMixture(random_state=rng).fit(X)
|
||||
assert_almost_equal(1.0, bgmm.mean_precision_prior_)
|
||||
|
||||
# Check correct init for a given value of mean_prior
|
||||
mean_prior = rng.rand(n_features)
|
||||
bgmm = BayesianGaussianMixture(
|
||||
n_components=n_components, mean_prior=mean_prior, random_state=rng
|
||||
).fit(X)
|
||||
assert_almost_equal(mean_prior, bgmm.mean_prior_)
|
||||
|
||||
# Check correct init for the default value of bemean_priorta
|
||||
bgmm = BayesianGaussianMixture(n_components=n_components, random_state=rng).fit(X)
|
||||
assert_almost_equal(X.mean(axis=0), bgmm.mean_prior_)
|
||||
|
||||
|
||||
def test_bayesian_mixture_precisions_prior_initialisation():
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples, n_features = 10, 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
|
||||
# Check raise message for a bad value of degrees_of_freedom_prior
|
||||
bad_degrees_of_freedom_prior_ = n_features - 1.0
|
||||
bgmm = BayesianGaussianMixture(
|
||||
degrees_of_freedom_prior=bad_degrees_of_freedom_prior_, random_state=rng
|
||||
)
|
||||
msg = (
|
||||
"The parameter 'degrees_of_freedom_prior' should be greater than"
|
||||
f" {n_features - 1}, but got {bad_degrees_of_freedom_prior_:.3f}."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bgmm.fit(X)
|
||||
|
||||
# Check correct init for a given value of degrees_of_freedom_prior
|
||||
degrees_of_freedom_prior = rng.rand() + n_features - 1.0
|
||||
bgmm = BayesianGaussianMixture(
|
||||
degrees_of_freedom_prior=degrees_of_freedom_prior, random_state=rng
|
||||
).fit(X)
|
||||
assert_almost_equal(degrees_of_freedom_prior, bgmm.degrees_of_freedom_prior_)
|
||||
|
||||
# Check correct init for the default value of degrees_of_freedom_prior
|
||||
degrees_of_freedom_prior_default = n_features
|
||||
bgmm = BayesianGaussianMixture(
|
||||
degrees_of_freedom_prior=degrees_of_freedom_prior_default, random_state=rng
|
||||
).fit(X)
|
||||
assert_almost_equal(
|
||||
degrees_of_freedom_prior_default, bgmm.degrees_of_freedom_prior_
|
||||
)
|
||||
|
||||
# Check correct init for a given value of covariance_prior
|
||||
covariance_prior = {
|
||||
"full": np.cov(X.T, bias=1) + 10,
|
||||
"tied": np.cov(X.T, bias=1) + 5,
|
||||
"diag": np.diag(np.atleast_2d(np.cov(X.T, bias=1))) + 3,
|
||||
"spherical": rng.rand(),
|
||||
}
|
||||
|
||||
bgmm = BayesianGaussianMixture(random_state=rng)
|
||||
for cov_type in ["full", "tied", "diag", "spherical"]:
|
||||
bgmm.covariance_type = cov_type
|
||||
bgmm.covariance_prior = covariance_prior[cov_type]
|
||||
bgmm.fit(X)
|
||||
assert_almost_equal(covariance_prior[cov_type], bgmm.covariance_prior_)
|
||||
|
||||
# Check correct init for the default value of covariance_prior
|
||||
covariance_prior_default = {
|
||||
"full": np.atleast_2d(np.cov(X.T)),
|
||||
"tied": np.atleast_2d(np.cov(X.T)),
|
||||
"diag": np.var(X, axis=0, ddof=1),
|
||||
"spherical": np.var(X, axis=0, ddof=1).mean(),
|
||||
}
|
||||
|
||||
bgmm = BayesianGaussianMixture(random_state=0)
|
||||
for cov_type in ["full", "tied", "diag", "spherical"]:
|
||||
bgmm.covariance_type = cov_type
|
||||
bgmm.fit(X)
|
||||
assert_almost_equal(covariance_prior_default[cov_type], bgmm.covariance_prior_)
|
||||
|
||||
|
||||
def test_bayesian_mixture_check_is_fitted():
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples, n_features = 10, 2
|
||||
|
||||
# Check raise message
|
||||
bgmm = BayesianGaussianMixture(random_state=rng)
|
||||
X = rng.rand(n_samples, n_features)
|
||||
|
||||
msg = "This BayesianGaussianMixture instance is not fitted yet."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
bgmm.score(X)
|
||||
|
||||
|
||||
def test_bayesian_mixture_weights():
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples, n_features = 10, 2
|
||||
|
||||
X = rng.rand(n_samples, n_features)
|
||||
|
||||
# Case Dirichlet distribution for the weight concentration prior type
|
||||
bgmm = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type="dirichlet_distribution",
|
||||
n_components=3,
|
||||
random_state=rng,
|
||||
).fit(X)
|
||||
|
||||
expected_weights = bgmm.weight_concentration_ / np.sum(bgmm.weight_concentration_)
|
||||
assert_almost_equal(expected_weights, bgmm.weights_)
|
||||
assert_almost_equal(np.sum(bgmm.weights_), 1.0)
|
||||
|
||||
# Case Dirichlet process for the weight concentration prior type
|
||||
dpgmm = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type="dirichlet_process",
|
||||
n_components=3,
|
||||
random_state=rng,
|
||||
).fit(X)
|
||||
weight_dirichlet_sum = (
|
||||
dpgmm.weight_concentration_[0] + dpgmm.weight_concentration_[1]
|
||||
)
|
||||
tmp = dpgmm.weight_concentration_[1] / weight_dirichlet_sum
|
||||
expected_weights = (
|
||||
dpgmm.weight_concentration_[0]
|
||||
/ weight_dirichlet_sum
|
||||
* np.hstack((1, np.cumprod(tmp[:-1])))
|
||||
)
|
||||
expected_weights /= np.sum(expected_weights)
|
||||
assert_almost_equal(expected_weights, dpgmm.weights_)
|
||||
assert_almost_equal(np.sum(dpgmm.weights_), 1.0)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
|
||||
def test_monotonic_likelihood():
|
||||
# We check that each step of the each step of variational inference without
|
||||
# regularization improve monotonically the training set of the bound
|
||||
rng = np.random.RandomState(0)
|
||||
rand_data = RandomData(rng, scale=20)
|
||||
n_components = rand_data.n_components
|
||||
|
||||
for prior_type in PRIOR_TYPE:
|
||||
for covar_type in COVARIANCE_TYPE:
|
||||
X = rand_data.X[covar_type]
|
||||
bgmm = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type=prior_type,
|
||||
n_components=2 * n_components,
|
||||
covariance_type=covar_type,
|
||||
warm_start=True,
|
||||
max_iter=1,
|
||||
random_state=rng,
|
||||
tol=1e-3,
|
||||
)
|
||||
current_lower_bound = -np.inf
|
||||
# Do one training iteration at a time so we can make sure that the
|
||||
# training log likelihood increases after each iteration.
|
||||
for _ in range(600):
|
||||
prev_lower_bound = current_lower_bound
|
||||
current_lower_bound = bgmm.fit(X).lower_bound_
|
||||
assert current_lower_bound >= prev_lower_bound
|
||||
|
||||
if bgmm.converged_:
|
||||
break
|
||||
assert bgmm.converged_
|
||||
|
||||
|
||||
def test_compare_covar_type():
|
||||
# We can compare the 'full' precision with the other cov_type if we apply
|
||||
# 1 iter of the M-step (done during _initialize_parameters).
|
||||
rng = np.random.RandomState(0)
|
||||
rand_data = RandomData(rng, scale=7)
|
||||
X = rand_data.X["full"]
|
||||
n_components = rand_data.n_components
|
||||
|
||||
for prior_type in PRIOR_TYPE:
|
||||
# Computation of the full_covariance
|
||||
bgmm = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type=prior_type,
|
||||
n_components=2 * n_components,
|
||||
covariance_type="full",
|
||||
max_iter=1,
|
||||
random_state=0,
|
||||
tol=1e-7,
|
||||
)
|
||||
bgmm._check_parameters(X)
|
||||
bgmm._initialize_parameters(X, np.random.RandomState(0))
|
||||
full_covariances = (
|
||||
bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis, np.newaxis]
|
||||
)
|
||||
|
||||
# Check tied_covariance = mean(full_covariances, 0)
|
||||
bgmm = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type=prior_type,
|
||||
n_components=2 * n_components,
|
||||
covariance_type="tied",
|
||||
max_iter=1,
|
||||
random_state=0,
|
||||
tol=1e-7,
|
||||
)
|
||||
bgmm._check_parameters(X)
|
||||
bgmm._initialize_parameters(X, np.random.RandomState(0))
|
||||
|
||||
tied_covariance = bgmm.covariances_ * bgmm.degrees_of_freedom_
|
||||
assert_almost_equal(tied_covariance, np.mean(full_covariances, 0))
|
||||
|
||||
# Check diag_covariance = diag(full_covariances)
|
||||
bgmm = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type=prior_type,
|
||||
n_components=2 * n_components,
|
||||
covariance_type="diag",
|
||||
max_iter=1,
|
||||
random_state=0,
|
||||
tol=1e-7,
|
||||
)
|
||||
bgmm._check_parameters(X)
|
||||
bgmm._initialize_parameters(X, np.random.RandomState(0))
|
||||
|
||||
diag_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_[:, np.newaxis]
|
||||
assert_almost_equal(
|
||||
diag_covariances, np.array([np.diag(cov) for cov in full_covariances])
|
||||
)
|
||||
|
||||
# Check spherical_covariance = np.mean(diag_covariances, 0)
|
||||
bgmm = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type=prior_type,
|
||||
n_components=2 * n_components,
|
||||
covariance_type="spherical",
|
||||
max_iter=1,
|
||||
random_state=0,
|
||||
tol=1e-7,
|
||||
)
|
||||
bgmm._check_parameters(X)
|
||||
bgmm._initialize_parameters(X, np.random.RandomState(0))
|
||||
|
||||
spherical_covariances = bgmm.covariances_ * bgmm.degrees_of_freedom_
|
||||
assert_almost_equal(spherical_covariances, np.mean(diag_covariances, 1))
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning")
|
||||
def test_check_covariance_precision():
|
||||
# We check that the dot product of the covariance and the precision
|
||||
# matrices is identity.
|
||||
rng = np.random.RandomState(0)
|
||||
rand_data = RandomData(rng, scale=7)
|
||||
n_components, n_features = 2 * rand_data.n_components, 2
|
||||
|
||||
# Computation of the full_covariance
|
||||
bgmm = BayesianGaussianMixture(
|
||||
n_components=n_components, max_iter=100, random_state=rng, tol=1e-3, reg_covar=0
|
||||
)
|
||||
for covar_type in COVARIANCE_TYPE:
|
||||
bgmm.covariance_type = covar_type
|
||||
bgmm.fit(rand_data.X[covar_type])
|
||||
|
||||
if covar_type == "full":
|
||||
for covar, precision in zip(bgmm.covariances_, bgmm.precisions_):
|
||||
assert_almost_equal(np.dot(covar, precision), np.eye(n_features))
|
||||
elif covar_type == "tied":
|
||||
assert_almost_equal(
|
||||
np.dot(bgmm.covariances_, bgmm.precisions_), np.eye(n_features)
|
||||
)
|
||||
|
||||
elif covar_type == "diag":
|
||||
assert_almost_equal(
|
||||
bgmm.covariances_ * bgmm.precisions_,
|
||||
np.ones((n_components, n_features)),
|
||||
)
|
||||
|
||||
else:
|
||||
assert_almost_equal(
|
||||
bgmm.covariances_ * bgmm.precisions_, np.ones(n_components)
|
||||
)
|
||||
|
||||
|
||||
def test_invariant_translation():
|
||||
# We check here that adding a constant in the data change correctly the
|
||||
# parameters of the mixture
|
||||
rng = np.random.RandomState(0)
|
||||
rand_data = RandomData(rng, scale=100)
|
||||
n_components = 2 * rand_data.n_components
|
||||
|
||||
for prior_type in PRIOR_TYPE:
|
||||
for covar_type in COVARIANCE_TYPE:
|
||||
X = rand_data.X[covar_type]
|
||||
bgmm1 = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type=prior_type,
|
||||
n_components=n_components,
|
||||
max_iter=100,
|
||||
random_state=0,
|
||||
tol=1e-3,
|
||||
reg_covar=0,
|
||||
).fit(X)
|
||||
bgmm2 = BayesianGaussianMixture(
|
||||
weight_concentration_prior_type=prior_type,
|
||||
n_components=n_components,
|
||||
max_iter=100,
|
||||
random_state=0,
|
||||
tol=1e-3,
|
||||
reg_covar=0,
|
||||
).fit(X + 100)
|
||||
|
||||
assert_almost_equal(bgmm1.means_, bgmm2.means_ - 100)
|
||||
assert_almost_equal(bgmm1.weights_, bgmm2.weights_)
|
||||
assert_almost_equal(bgmm1.covariances_, bgmm2.covariances_)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:.*did not converge.*")
|
||||
@pytest.mark.parametrize(
|
||||
"seed, max_iter, tol",
|
||||
[
|
||||
(0, 2, 1e-7), # strict non-convergence
|
||||
(1, 2, 1e-1), # loose non-convergence
|
||||
(3, 300, 1e-7), # strict convergence
|
||||
(4, 300, 1e-1), # loose convergence
|
||||
],
|
||||
)
|
||||
def test_bayesian_mixture_fit_predict(seed, max_iter, tol):
|
||||
rng = np.random.RandomState(seed)
|
||||
rand_data = RandomData(rng, n_samples=50, scale=7)
|
||||
n_components = 2 * rand_data.n_components
|
||||
|
||||
for covar_type in COVARIANCE_TYPE:
|
||||
bgmm1 = BayesianGaussianMixture(
|
||||
n_components=n_components,
|
||||
max_iter=max_iter,
|
||||
random_state=rng,
|
||||
tol=tol,
|
||||
reg_covar=0,
|
||||
)
|
||||
bgmm1.covariance_type = covar_type
|
||||
bgmm2 = copy.deepcopy(bgmm1)
|
||||
X = rand_data.X[covar_type]
|
||||
|
||||
Y_pred1 = bgmm1.fit(X).predict(X)
|
||||
Y_pred2 = bgmm2.fit_predict(X)
|
||||
assert_array_equal(Y_pred1, Y_pred2)
|
||||
|
||||
|
||||
def test_bayesian_mixture_fit_predict_n_init():
|
||||
# Check that fit_predict is equivalent to fit.predict, when n_init > 1
|
||||
X = np.random.RandomState(0).randn(50, 5)
|
||||
gm = BayesianGaussianMixture(n_components=5, n_init=10, random_state=0)
|
||||
y_pred1 = gm.fit_predict(X)
|
||||
y_pred2 = gm.predict(X)
|
||||
assert_array_equal(y_pred1, y_pred2)
|
||||
|
||||
|
||||
def test_bayesian_mixture_predict_predict_proba():
|
||||
# this is the same test as test_gaussian_mixture_predict_predict_proba()
|
||||
rng = np.random.RandomState(0)
|
||||
rand_data = RandomData(rng)
|
||||
for prior_type in PRIOR_TYPE:
|
||||
for covar_type in COVARIANCE_TYPE:
|
||||
X = rand_data.X[covar_type]
|
||||
Y = rand_data.Y
|
||||
bgmm = BayesianGaussianMixture(
|
||||
n_components=rand_data.n_components,
|
||||
random_state=rng,
|
||||
weight_concentration_prior_type=prior_type,
|
||||
covariance_type=covar_type,
|
||||
)
|
||||
|
||||
# Check a warning message arrive if we don't do fit
|
||||
msg = (
|
||||
"This BayesianGaussianMixture instance is not fitted yet. "
|
||||
"Call 'fit' with appropriate arguments before using this "
|
||||
"estimator."
|
||||
)
|
||||
with pytest.raises(NotFittedError, match=msg):
|
||||
bgmm.predict(X)
|
||||
|
||||
bgmm.fit(X)
|
||||
Y_pred = bgmm.predict(X)
|
||||
Y_pred_proba = bgmm.predict_proba(X).argmax(axis=1)
|
||||
assert_array_equal(Y_pred, Y_pred_proba)
|
||||
assert adjusted_rand_score(Y, Y_pred) >= 0.95
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,30 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.mixture import BayesianGaussianMixture, GaussianMixture
|
||||
|
||||
|
||||
@pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()])
|
||||
def test_gaussian_mixture_n_iter(estimator):
|
||||
# check that n_iter is the number of iteration performed.
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.rand(10, 5)
|
||||
max_iter = 1
|
||||
estimator.set_params(max_iter=max_iter)
|
||||
estimator.fit(X)
|
||||
assert estimator.n_iter_ == max_iter
|
||||
|
||||
|
||||
@pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()])
|
||||
def test_mixture_n_components_greater_than_n_samples_error(estimator):
|
||||
"""Check error when n_components <= n_samples"""
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.rand(10, 5)
|
||||
estimator.set_params(n_components=12)
|
||||
|
||||
msg = "Expected n_samples >= n_components"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
estimator.fit(X)
|
||||
Reference in New Issue
Block a user