add read me

This commit is contained in:
2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions

View File

@@ -0,0 +1,5 @@
"""This module contains least-squares algorithms."""
from .least_squares import least_squares
from .lsq_linear import lsq_linear
__all__ = ['least_squares', 'lsq_linear']

View File

@@ -0,0 +1,183 @@
"""Bounded-variable least-squares algorithm."""
import numpy as np
from numpy.linalg import norm, lstsq
from scipy.optimize import OptimizeResult
from .common import print_header_linear, print_iteration_linear
def compute_kkt_optimality(g, on_bound):
"""Compute the maximum violation of KKT conditions."""
g_kkt = g * on_bound
free_set = on_bound == 0
g_kkt[free_set] = np.abs(g[free_set])
return np.max(g_kkt)
def bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose, rcond=None):
m, n = A.shape
x = x_lsq.copy()
on_bound = np.zeros(n)
mask = x <= lb
x[mask] = lb[mask]
on_bound[mask] = -1
mask = x >= ub
x[mask] = ub[mask]
on_bound[mask] = 1
free_set = on_bound == 0
active_set = ~free_set
free_set, = np.nonzero(free_set)
r = A.dot(x) - b
cost = 0.5 * np.dot(r, r)
initial_cost = cost
g = A.T.dot(r)
cost_change = None
step_norm = None
iteration = 0
if verbose == 2:
print_header_linear()
# This is the initialization loop. The requirement is that the
# least-squares solution on free variables is feasible before BVLS starts.
# One possible initialization is to set all variables to lower or upper
# bounds, but many iterations may be required from this state later on.
# The implemented ad-hoc procedure which intuitively should give a better
# initial state: find the least-squares solution on current free variables,
# if its feasible then stop, otherwise, set violating variables to
# corresponding bounds and continue on the reduced set of free variables.
while free_set.size > 0:
if verbose == 2:
optimality = compute_kkt_optimality(g, on_bound)
print_iteration_linear(iteration, cost, cost_change, step_norm,
optimality)
iteration += 1
x_free_old = x[free_set].copy()
A_free = A[:, free_set]
b_free = b - A.dot(x * active_set)
z = lstsq(A_free, b_free, rcond=rcond)[0]
lbv = z < lb[free_set]
ubv = z > ub[free_set]
v = lbv | ubv
if np.any(lbv):
ind = free_set[lbv]
x[ind] = lb[ind]
active_set[ind] = True
on_bound[ind] = -1
if np.any(ubv):
ind = free_set[ubv]
x[ind] = ub[ind]
active_set[ind] = True
on_bound[ind] = 1
ind = free_set[~v]
x[ind] = z[~v]
r = A.dot(x) - b
cost_new = 0.5 * np.dot(r, r)
cost_change = cost - cost_new
cost = cost_new
g = A.T.dot(r)
step_norm = norm(x[free_set] - x_free_old)
if np.any(v):
free_set = free_set[~v]
else:
break
if max_iter is None:
max_iter = n
max_iter += iteration
termination_status = None
# Main BVLS loop.
optimality = compute_kkt_optimality(g, on_bound)
for iteration in range(iteration, max_iter): # BVLS Loop A
if verbose == 2:
print_iteration_linear(iteration, cost, cost_change,
step_norm, optimality)
if optimality < tol:
termination_status = 1
if termination_status is not None:
break
move_to_free = np.argmax(g * on_bound)
on_bound[move_to_free] = 0
while True: # BVLS Loop B
free_set = on_bound == 0
active_set = ~free_set
free_set, = np.nonzero(free_set)
x_free = x[free_set]
x_free_old = x_free.copy()
lb_free = lb[free_set]
ub_free = ub[free_set]
A_free = A[:, free_set]
b_free = b - A.dot(x * active_set)
z = lstsq(A_free, b_free, rcond=rcond)[0]
lbv, = np.nonzero(z < lb_free)
ubv, = np.nonzero(z > ub_free)
v = np.hstack((lbv, ubv))
if v.size > 0:
alphas = np.hstack((
lb_free[lbv] - x_free[lbv],
ub_free[ubv] - x_free[ubv])) / (z[v] - x_free[v])
i = np.argmin(alphas)
i_free = v[i]
alpha = alphas[i]
x_free *= 1 - alpha
x_free += alpha * z
x[free_set] = x_free
if i < lbv.size:
on_bound[free_set[i_free]] = -1
else:
on_bound[free_set[i_free]] = 1
else:
x_free = z
x[free_set] = x_free
break
step_norm = norm(x_free - x_free_old)
r = A.dot(x) - b
cost_new = 0.5 * np.dot(r, r)
cost_change = cost - cost_new
if cost_change < tol * cost:
termination_status = 2
cost = cost_new
g = A.T.dot(r)
optimality = compute_kkt_optimality(g, on_bound)
if termination_status is None:
termination_status = 0
return OptimizeResult(
x=x, fun=r, cost=cost, optimality=optimality, active_mask=on_bound,
nit=iteration + 1, status=termination_status,
initial_cost=initial_cost)

View File

@@ -0,0 +1,731 @@
"""Functions used by least-squares algorithms."""
from math import copysign
import numpy as np
from numpy.linalg import norm
from scipy.linalg import cho_factor, cho_solve, LinAlgError
from scipy.sparse.linalg import LinearOperator, aslinearoperator
from scipy._lib._sparse import issparse
EPS = np.finfo(float).eps
# Functions related to a trust-region problem.
def intersect_trust_region(x, s, Delta):
"""Find the intersection of a line with the boundary of a trust region.
This function solves the quadratic equation with respect to t
||(x + s*t)||**2 = Delta**2.
Returns
-------
t_neg, t_pos : tuple of float
Negative and positive roots.
Raises
------
ValueError
If `s` is zero or `x` is not within the trust region.
"""
a = np.dot(s, s)
if a == 0:
raise ValueError("`s` is zero.")
b = np.dot(x, s)
c = np.dot(x, x) - Delta**2
if c > 0:
raise ValueError("`x` is not within the trust region.")
d = np.sqrt(b*b - a*c) # Root from one fourth of the discriminant.
# Computations below avoid loss of significance, see "Numerical Recipes".
q = -(b + copysign(d, b))
t1 = q / a
t2 = c / q
if t1 < t2:
return t1, t2
else:
return t2, t1
def solve_lsq_trust_region(n, m, uf, s, V, Delta, initial_alpha=None,
rtol=0.01, max_iter=10):
"""Solve a trust-region problem arising in least-squares minimization.
This function implements a method described by J. J. More [1]_ and used
in MINPACK, but it relies on a single SVD of Jacobian instead of series
of Cholesky decompositions. Before running this function, compute:
``U, s, VT = svd(J, full_matrices=False)``.
Parameters
----------
n : int
Number of variables.
m : int
Number of residuals.
uf : ndarray
Computed as U.T.dot(f).
s : ndarray
Singular values of J.
V : ndarray
Transpose of VT.
Delta : float
Radius of a trust region.
initial_alpha : float, optional
Initial guess for alpha, which might be available from a previous
iteration. If None, determined automatically.
rtol : float, optional
Stopping tolerance for the root-finding procedure. Namely, the
solution ``p`` will satisfy ``abs(norm(p) - Delta) < rtol * Delta``.
max_iter : int, optional
Maximum allowed number of iterations for the root-finding procedure.
Returns
-------
p : ndarray, shape (n,)
Found solution of a trust-region problem.
alpha : float
Positive value such that (J.T*J + alpha*I)*p = -J.T*f.
Sometimes called Levenberg-Marquardt parameter.
n_iter : int
Number of iterations made by root-finding procedure. Zero means
that Gauss-Newton step was selected as the solution.
References
----------
.. [1] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes
in Mathematics 630, Springer Verlag, pp. 105-116, 1977.
"""
def phi_and_derivative(alpha, suf, s, Delta):
"""Function of which to find zero.
It is defined as "norm of regularized (by alpha) least-squares
solution minus `Delta`". Refer to [1]_.
"""
denom = s**2 + alpha
p_norm = norm(suf / denom)
phi = p_norm - Delta
phi_prime = -np.sum(suf ** 2 / denom**3) / p_norm
return phi, phi_prime
suf = s * uf
# Check if J has full rank and try Gauss-Newton step.
if m >= n:
threshold = EPS * m * s[0]
full_rank = s[-1] > threshold
else:
full_rank = False
if full_rank:
p = -V.dot(uf / s)
if norm(p) <= Delta:
return p, 0.0, 0
alpha_upper = norm(suf) / Delta
if full_rank:
phi, phi_prime = phi_and_derivative(0.0, suf, s, Delta)
alpha_lower = -phi / phi_prime
else:
alpha_lower = 0.0
if initial_alpha is None or not full_rank and initial_alpha == 0:
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
else:
alpha = initial_alpha
for it in range(max_iter):
if alpha < alpha_lower or alpha > alpha_upper:
alpha = max(0.001 * alpha_upper, (alpha_lower * alpha_upper)**0.5)
phi, phi_prime = phi_and_derivative(alpha, suf, s, Delta)
if phi < 0:
alpha_upper = alpha
ratio = phi / phi_prime
alpha_lower = max(alpha_lower, alpha - ratio)
alpha -= (phi + Delta) * ratio / Delta
if np.abs(phi) < rtol * Delta:
break
p = -V.dot(suf / (s**2 + alpha))
# Make the norm of p equal to Delta, p is changed only slightly during
# this. It is done to prevent p lie outside the trust region (which can
# cause problems later).
p *= Delta / norm(p)
return p, alpha, it + 1
def solve_trust_region_2d(B, g, Delta):
"""Solve a general trust-region problem in 2 dimensions.
The problem is reformulated as a 4th order algebraic equation,
the solution of which is found by numpy.roots.
Parameters
----------
B : ndarray, shape (2, 2)
Symmetric matrix, defines a quadratic term of the function.
g : ndarray, shape (2,)
Defines a linear term of the function.
Delta : float
Radius of a trust region.
Returns
-------
p : ndarray, shape (2,)
Found solution.
newton_step : bool
Whether the returned solution is the Newton step which lies within
the trust region.
"""
try:
R, lower = cho_factor(B)
p = -cho_solve((R, lower), g)
if np.dot(p, p) <= Delta**2:
return p, True
except LinAlgError:
pass
a = B[0, 0] * Delta**2
b = B[0, 1] * Delta**2
c = B[1, 1] * Delta**2
d = g[0] * Delta
f = g[1] * Delta
coeffs = np.array(
[-b + d, 2 * (a - c + f), 6 * b, 2 * (-a + c + f), -b - d])
t = np.roots(coeffs) # Can handle leading zeros.
t = np.real(t[np.isreal(t)])
p = Delta * np.vstack((2 * t / (1 + t**2), (1 - t**2) / (1 + t**2)))
value = 0.5 * np.sum(p * B.dot(p), axis=0) + np.dot(g, p)
i = np.argmin(value)
p = p[:, i]
return p, False
def update_tr_radius(Delta, actual_reduction, predicted_reduction,
step_norm, bound_hit):
"""Update the radius of a trust region based on the cost reduction.
Returns
-------
Delta : float
New radius.
ratio : float
Ratio between actual and predicted reductions.
"""
if predicted_reduction > 0:
ratio = actual_reduction / predicted_reduction
elif predicted_reduction == actual_reduction == 0:
ratio = 1
else:
ratio = 0
if ratio < 0.25:
Delta = 0.25 * step_norm
elif ratio > 0.75 and bound_hit:
Delta *= 2.0
return Delta, ratio
# Construction and minimization of quadratic functions.
def build_quadratic_1d(J, g, s, diag=None, s0=None):
"""Parameterize a multivariate quadratic function along a line.
The resulting univariate quadratic function is given as follows::
f(t) = 0.5 * (s0 + s*t).T * (J.T*J + diag) * (s0 + s*t) +
g.T * (s0 + s*t)
Parameters
----------
J : ndarray, sparse array or LinearOperator shape (m, n)
Jacobian matrix, affects the quadratic term.
g : ndarray, shape (n,)
Gradient, defines the linear term.
s : ndarray, shape (n,)
Direction vector of a line.
diag : None or ndarray with shape (n,), optional
Addition diagonal part, affects the quadratic term.
If None, assumed to be 0.
s0 : None or ndarray with shape (n,), optional
Initial point. If None, assumed to be 0.
Returns
-------
a : float
Coefficient for t**2.
b : float
Coefficient for t.
c : float
Free term. Returned only if `s0` is provided.
"""
v = J.dot(s)
a = np.dot(v, v)
if diag is not None:
a += np.dot(s * diag, s)
a *= 0.5
b = np.dot(g, s)
if s0 is not None:
u = J.dot(s0)
b += np.dot(u, v)
c = 0.5 * np.dot(u, u) + np.dot(g, s0)
if diag is not None:
b += np.dot(s0 * diag, s)
c += 0.5 * np.dot(s0 * diag, s0)
return a, b, c
else:
return a, b
def minimize_quadratic_1d(a, b, lb, ub, c=0):
"""Minimize a 1-D quadratic function subject to bounds.
The free term `c` is 0 by default. Bounds must be finite.
Returns
-------
t : float
Minimum point.
y : float
Minimum value.
"""
t = [lb, ub]
if a != 0:
extremum = -0.5 * b / a
if lb < extremum < ub:
t.append(extremum)
t = np.asarray(t)
y = t * (a * t + b) + c
min_index = np.argmin(y)
return t[min_index], y[min_index]
def evaluate_quadratic(J, g, s, diag=None):
"""Compute values of a quadratic function arising in least squares.
The function is 0.5 * s.T * (J.T * J + diag) * s + g.T * s.
Parameters
----------
J : ndarray, sparse array or LinearOperator, shape (m, n)
Jacobian matrix, affects the quadratic term.
g : ndarray, shape (n,)
Gradient, defines the linear term.
s : ndarray, shape (k, n) or (n,)
Array containing steps as rows.
diag : ndarray, shape (n,), optional
Addition diagonal part, affects the quadratic term.
If None, assumed to be 0.
Returns
-------
values : ndarray with shape (k,) or float
Values of the function. If `s` was 2-D, then ndarray is
returned, otherwise, float is returned.
"""
if s.ndim == 1:
Js = J.dot(s)
q = np.dot(Js, Js)
if diag is not None:
q += np.dot(s * diag, s)
else:
Js = J.dot(s.T)
q = np.sum(Js**2, axis=0)
if diag is not None:
q += np.sum(diag * s**2, axis=1)
l = np.dot(s, g)
return 0.5 * q + l
# Utility functions to work with bound constraints.
def in_bounds(x, lb, ub):
"""Check if a point lies within bounds."""
return np.all((x >= lb) & (x <= ub))
def step_size_to_bound(x, s, lb, ub):
"""Compute a min_step size required to reach a bound.
The function computes a positive scalar t, such that x + s * t is on
the bound.
Returns
-------
step : float
Computed step. Non-negative value.
hits : ndarray of int with shape of x
Each element indicates whether a corresponding variable reaches the
bound:
* 0 - the bound was not hit.
* -1 - the lower bound was hit.
* 1 - the upper bound was hit.
"""
non_zero = np.nonzero(s)
s_non_zero = s[non_zero]
steps = np.empty_like(x)
steps.fill(np.inf)
with np.errstate(over='ignore'):
steps[non_zero] = np.maximum((lb - x)[non_zero] / s_non_zero,
(ub - x)[non_zero] / s_non_zero)
min_step = np.min(steps)
return min_step, np.equal(steps, min_step) * np.sign(s).astype(int)
def find_active_constraints(x, lb, ub, rtol=1e-10):
"""Determine which constraints are active in a given point.
The threshold is computed using `rtol` and the absolute value of the
closest bound.
Returns
-------
active : ndarray of int with shape of x
Each component shows whether the corresponding constraint is active:
* 0 - a constraint is not active.
* -1 - a lower bound is active.
* 1 - a upper bound is active.
"""
active = np.zeros_like(x, dtype=int)
if rtol == 0:
active[x <= lb] = -1
active[x >= ub] = 1
return active
lower_dist = x - lb
upper_dist = ub - x
lower_threshold = rtol * np.maximum(1, np.abs(lb))
upper_threshold = rtol * np.maximum(1, np.abs(ub))
lower_active = (np.isfinite(lb) &
(lower_dist <= np.minimum(upper_dist, lower_threshold)))
active[lower_active] = -1
upper_active = (np.isfinite(ub) &
(upper_dist <= np.minimum(lower_dist, upper_threshold)))
active[upper_active] = 1
return active
def make_strictly_feasible(x, lb, ub, rstep=1e-10):
"""Shift a point to the interior of a feasible region.
Each element of the returned vector is at least at a relative distance
`rstep` from the closest bound. If ``rstep=0`` then `np.nextafter` is used.
"""
x_new = x.copy()
active = find_active_constraints(x, lb, ub, rstep)
lower_mask = np.equal(active, -1)
upper_mask = np.equal(active, 1)
if rstep == 0:
x_new[lower_mask] = np.nextafter(lb[lower_mask], ub[lower_mask])
x_new[upper_mask] = np.nextafter(ub[upper_mask], lb[upper_mask])
else:
x_new[lower_mask] = (lb[lower_mask] +
rstep * np.maximum(1, np.abs(lb[lower_mask])))
x_new[upper_mask] = (ub[upper_mask] -
rstep * np.maximum(1, np.abs(ub[upper_mask])))
tight_bounds = (x_new < lb) | (x_new > ub)
x_new[tight_bounds] = 0.5 * (lb[tight_bounds] + ub[tight_bounds])
return x_new
def CL_scaling_vector(x, g, lb, ub):
"""Compute Coleman-Li scaling vector and its derivatives.
Components of a vector v are defined as follows::
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
| 1, otherwise
According to this definition v[i] >= 0 for all i. It differs from the
definition in paper [1]_ (eq. (2.2)), where the absolute value of v is
used. Both definitions are equivalent down the line.
Derivatives of v with respect to x take value 1, -1 or 0 depending on a
case.
Returns
-------
v : ndarray with shape of x
Scaling vector.
dv : ndarray with shape of x
Derivatives of v[i] with respect to x[i], diagonal elements of v's
Jacobian.
References
----------
.. [1] M.A. Branch, T.F. Coleman, and Y. Li, "A Subspace, Interior,
and Conjugate Gradient Method for Large-Scale Bound-Constrained
Minimization Problems," SIAM Journal on Scientific Computing,
Vol. 21, Number 1, pp 1-23, 1999.
"""
v = np.ones_like(x)
dv = np.zeros_like(x)
mask = (g < 0) & np.isfinite(ub)
v[mask] = ub[mask] - x[mask]
dv[mask] = -1
mask = (g > 0) & np.isfinite(lb)
v[mask] = x[mask] - lb[mask]
dv[mask] = 1
return v, dv
def reflective_transformation(y, lb, ub):
"""Compute reflective transformation and its gradient."""
if in_bounds(y, lb, ub):
return y, np.ones_like(y)
lb_finite = np.isfinite(lb)
ub_finite = np.isfinite(ub)
x = y.copy()
g_negative = np.zeros_like(y, dtype=bool)
mask = lb_finite & ~ub_finite
x[mask] = np.maximum(y[mask], 2 * lb[mask] - y[mask])
g_negative[mask] = y[mask] < lb[mask]
mask = ~lb_finite & ub_finite
x[mask] = np.minimum(y[mask], 2 * ub[mask] - y[mask])
g_negative[mask] = y[mask] > ub[mask]
mask = lb_finite & ub_finite
d = ub - lb
t = np.remainder(y[mask] - lb[mask], 2 * d[mask])
x[mask] = lb[mask] + np.minimum(t, 2 * d[mask] - t)
g_negative[mask] = t > d[mask]
g = np.ones_like(y)
g[g_negative] = -1
return x, g
# Functions to display algorithm's progress.
def print_header_nonlinear():
print("{:^15}{:^15}{:^15}{:^15}{:^15}{:^15}"
.format("Iteration", "Total nfev", "Cost", "Cost reduction",
"Step norm", "Optimality"))
def print_iteration_nonlinear(iteration, nfev, cost, cost_reduction,
step_norm, optimality):
if cost_reduction is None:
cost_reduction = " " * 15
else:
cost_reduction = f"{cost_reduction:^15.2e}"
if step_norm is None:
step_norm = " " * 15
else:
step_norm = f"{step_norm:^15.2e}"
print(f"{iteration:^15}{nfev:^15}{cost:^15.4e}{cost_reduction}{step_norm}{optimality:^15.2e}")
def print_header_linear():
print("{:^15}{:^15}{:^15}{:^15}{:^15}"
.format("Iteration", "Cost", "Cost reduction", "Step norm",
"Optimality"))
def print_iteration_linear(iteration, cost, cost_reduction, step_norm,
optimality):
if cost_reduction is None:
cost_reduction = " " * 15
else:
cost_reduction = f"{cost_reduction:^15.2e}"
if step_norm is None:
step_norm = " " * 15
else:
step_norm = f"{step_norm:^15.2e}"
print(f"{iteration:^15}{cost:^15.4e}{cost_reduction}{step_norm}{optimality:^15.2e}")
# Simple helper functions.
def compute_grad(J, f):
"""Compute gradient of the least-squares cost function."""
if isinstance(J, LinearOperator):
return J.rmatvec(f)
else:
return J.T.dot(f)
def compute_jac_scale(J, scale_inv_old=None):
"""Compute variables scale based on the Jacobian matrix."""
if issparse(J):
scale_inv = np.asarray(J.power(2).sum(axis=0)).ravel()**0.5
else:
scale_inv = np.sum(J**2, axis=0)**0.5
if scale_inv_old is None:
scale_inv[scale_inv == 0] = 1
else:
scale_inv = np.maximum(scale_inv, scale_inv_old)
return 1 / scale_inv, scale_inv
def left_multiplied_operator(J, d):
"""Return diag(d) J as LinearOperator."""
J = aslinearoperator(J)
def matvec(x):
return d * J.matvec(x)
def matmat(X):
return d[:, np.newaxis] * J.matmat(X)
def rmatvec(x):
return J.rmatvec(x.ravel() * d)
return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
rmatvec=rmatvec)
def right_multiplied_operator(J, d):
"""Return J diag(d) as LinearOperator."""
J = aslinearoperator(J)
def matvec(x):
return J.matvec(np.ravel(x) * d)
def matmat(X):
return J.matmat(X * d[:, np.newaxis])
def rmatvec(x):
return d * J.rmatvec(x)
return LinearOperator(J.shape, matvec=matvec, matmat=matmat,
rmatvec=rmatvec)
def regularized_lsq_operator(J, diag):
"""Return a matrix arising in regularized least squares as LinearOperator.
The matrix is
[ J ]
[ D ]
where D is diagonal matrix with elements from `diag`.
"""
J = aslinearoperator(J)
m, n = J.shape
def matvec(x):
return np.hstack((J.matvec(x), diag * x))
def rmatvec(x):
x1 = x[:m]
x2 = x[m:]
return J.rmatvec(x1) + diag * x2
return LinearOperator((m + n, n), matvec=matvec, rmatvec=rmatvec)
def right_multiply(J, d, copy=True):
"""Compute J diag(d).
If `copy` is False, `J` is modified in place (unless being LinearOperator).
"""
if copy and not isinstance(J, LinearOperator):
J = J.copy()
if issparse(J):
J.data *= d.take(J.indices, mode='clip') # scikit-learn recipe.
elif isinstance(J, LinearOperator):
J = right_multiplied_operator(J, d)
else:
J *= d
return J
def left_multiply(J, d, copy=True):
"""Compute diag(d) J.
If `copy` is False, `J` is modified in place (unless being LinearOperator).
"""
if copy and not isinstance(J, LinearOperator):
J = J.copy()
if issparse(J):
J.data *= np.repeat(d, np.diff(J.indptr)) # scikit-learn recipe.
elif isinstance(J, LinearOperator):
J = left_multiplied_operator(J, d)
else:
J *= d[:, np.newaxis]
return J
def check_termination(dF, F, dx_norm, x_norm, ratio, ftol, xtol):
"""Check termination condition for nonlinear least squares."""
ftol_satisfied = dF < ftol * F and ratio > 0.25
xtol_satisfied = dx_norm < xtol * (xtol + x_norm)
if ftol_satisfied and xtol_satisfied:
return 4
elif ftol_satisfied:
return 2
elif xtol_satisfied:
return 3
else:
return None
def scale_for_robust_loss_function(J, f, rho):
"""Scale Jacobian and residuals for a robust loss function.
Arrays are modified in place.
"""
J_scale = rho[1] + 2 * rho[2] * f**2
J_scale[J_scale < EPS] = EPS
J_scale **= 0.5
f *= rho[1] / J_scale
return left_multiply(J, J_scale, copy=False), f

View File

@@ -0,0 +1,345 @@
"""
Dogleg algorithm with rectangular trust regions for least-squares minimization.
The description of the algorithm can be found in [Voglis]_. The algorithm does
trust-region iterations, but the shape of trust regions is rectangular as
opposed to conventional elliptical. The intersection of a trust region and
an initial feasible region is again some rectangle. Thus, on each iteration a
bound-constrained quadratic optimization problem is solved.
A quadratic problem is solved by well-known dogleg approach, where the
function is minimized along piecewise-linear "dogleg" path [NumOpt]_,
Chapter 4. If Jacobian is not rank-deficient then the function is decreasing
along this path, and optimization amounts to simply following along this
path as long as a point stays within the bounds. A constrained Cauchy step
(along the anti-gradient) is considered for safety in rank deficient cases,
in this situations the convergence might be slow.
If during iterations some variable hit the initial bound and the component
of anti-gradient points outside the feasible region, then a next dogleg step
won't make any progress. At this state such variables satisfy first-order
optimality conditions and they are excluded before computing a next dogleg
step.
Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense
Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for
dense and sparse matrices, or Jacobian being LinearOperator). The second
option allows to solve very large problems (up to couple of millions of
residuals on a regular PC), provided the Jacobian matrix is sufficiently
sparse. But note that dogbox is not very good for solving problems with
large number of constraints, because of variables exclusion-inclusion on each
iteration (a required number of function evaluations might be high or accuracy
of a solution will be poor), thus its large-scale usage is probably limited
to unconstrained problems.
References
----------
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg
Approach for Unconstrained and Bound Constrained Nonlinear
Optimization", WSEAS International Conference on Applied
Mathematics, Corfu, Greece, 2004.
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition".
"""
import numpy as np
from numpy.linalg import lstsq, norm
from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr
from scipy.optimize import OptimizeResult
from scipy._lib._util import _call_callback_maybe_halt
from .common import (
step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic,
build_quadratic_1d, minimize_quadratic_1d, compute_grad,
compute_jac_scale, check_termination, scale_for_robust_loss_function,
print_header_nonlinear, print_iteration_nonlinear)
def lsmr_operator(Jop, d, active_set):
"""Compute LinearOperator to use in LSMR by dogbox algorithm.
`active_set` mask is used to excluded active variables from computations
of matrix-vector products.
"""
m, n = Jop.shape
def matvec(x):
x_free = x.ravel().copy()
x_free[active_set] = 0
return Jop.matvec(x * d)
def rmatvec(x):
r = d * Jop.rmatvec(x)
r[active_set] = 0
return r
return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float)
def find_intersection(x, tr_bounds, lb, ub):
"""Find intersection of trust-region bounds and initial bounds.
Returns
-------
lb_total, ub_total : ndarray with shape of x
Lower and upper bounds of the intersection region.
orig_l, orig_u : ndarray of bool with shape of x
True means that an original bound is taken as a corresponding bound
in the intersection region.
tr_l, tr_u : ndarray of bool with shape of x
True means that a trust-region bound is taken as a corresponding bound
in the intersection region.
"""
lb_centered = lb - x
ub_centered = ub - x
lb_total = np.maximum(lb_centered, -tr_bounds)
ub_total = np.minimum(ub_centered, tr_bounds)
orig_l = np.equal(lb_total, lb_centered)
orig_u = np.equal(ub_total, ub_centered)
tr_l = np.equal(lb_total, -tr_bounds)
tr_u = np.equal(ub_total, tr_bounds)
return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u
def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub):
"""Find dogleg step in a rectangular region.
Returns
-------
step : ndarray, shape (n,)
Computed dogleg step.
bound_hits : ndarray of int, shape (n,)
Each component shows whether a corresponding variable hits the
initial bound after the step is taken:
* 0 - a variable doesn't hit the bound.
* -1 - lower bound is hit.
* 1 - upper bound is hit.
tr_hit : bool
Whether the step hit the boundary of the trust-region.
"""
lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection(
x, tr_bounds, lb, ub
)
bound_hits = np.zeros_like(x, dtype=int)
if in_bounds(newton_step, lb_total, ub_total):
return newton_step, bound_hits, False
to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total)
# The classical dogleg algorithm would check if Cauchy step fits into
# the bounds, and just return it constrained version if not. But in a
# rectangular trust region it makes sense to try to improve constrained
# Cauchy step too. Thus, we don't distinguish these two cases.
cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g
step_diff = newton_step - cauchy_step
step_size, hits = step_size_to_bound(cauchy_step, step_diff,
lb_total, ub_total)
bound_hits[(hits < 0) & orig_l] = -1
bound_hits[(hits > 0) & orig_u] = 1
tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u)
return cauchy_step + step_size * step_diff, bound_hits, tr_hit
def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
loss_function, tr_solver, tr_options, verbose, callback=None):
f = f0
f_true = f.copy()
nfev = 1
J = J0
njev = 1
if loss_function is not None:
rho = loss_function(f)
cost = 0.5 * np.sum(rho[0])
J, f = scale_for_robust_loss_function(J, f, rho)
else:
cost = 0.5 * np.dot(f, f)
g = compute_grad(J, f)
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
if jac_scale:
scale, scale_inv = compute_jac_scale(J)
else:
scale, scale_inv = x_scale, 1 / x_scale
Delta = norm(x0 * scale_inv, ord=np.inf)
if Delta == 0:
Delta = 1.0
on_bound = np.zeros_like(x0, dtype=int)
on_bound[np.equal(x0, lb)] = -1
on_bound[np.equal(x0, ub)] = 1
x = x0
step = np.empty_like(x0)
if max_nfev is None:
max_nfev = x0.size * 100
termination_status = None
iteration = 0
step_norm = None
actual_reduction = None
if verbose == 2:
print_header_nonlinear()
while True:
active_set = on_bound * g < 0
free_set = ~active_set
g_free = g[free_set]
g_full = g.copy()
g[active_set] = 0
g_norm = norm(g, ord=np.inf)
if g_norm < gtol:
termination_status = 1
if verbose == 2:
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
step_norm, g_norm)
if termination_status is not None or nfev == max_nfev:
break
x_free = x[free_set]
lb_free = lb[free_set]
ub_free = ub[free_set]
scale_free = scale[free_set]
# Compute (Gauss-)Newton and build quadratic model for Cauchy step.
if tr_solver == 'exact':
J_free = J[:, free_set]
newton_step = lstsq(J_free, -f, rcond=-1)[0]
# Coefficients for the quadratic model along the anti-gradient.
a, b = build_quadratic_1d(J_free, g_free, -g_free)
elif tr_solver == 'lsmr':
Jop = aslinearoperator(J)
# We compute lsmr step in scaled variables and then
# transform back to normal variables, if lsmr would give exact lsq
# solution, this would be equivalent to not doing any
# transformations, but from experience it's better this way.
# We pass active_set to make computations as if we selected
# the free subset of J columns, but without actually doing any
# slicing, which is expensive for sparse matrices and impossible
# for LinearOperator.
lsmr_op = lsmr_operator(Jop, scale, active_set)
newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set]
newton_step *= scale_free
# Components of g for active variables were zeroed, so this call
# is correct and equivalent to using J_free and g_free.
a, b = build_quadratic_1d(Jop, g, -g)
actual_reduction = -1.0
while actual_reduction <= 0 and nfev < max_nfev:
tr_bounds = Delta * scale_free
step_free, on_bound_free, tr_hit = dogleg_step(
x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free)
step.fill(0.0)
step[free_set] = step_free
if tr_solver == 'exact':
predicted_reduction = -evaluate_quadratic(J_free, g_free,
step_free)
elif tr_solver == 'lsmr':
predicted_reduction = -evaluate_quadratic(Jop, g, step)
# gh11403 ensure that solution is fully within bounds.
x_new = np.clip(x + step, lb, ub)
f_new = fun(x_new)
nfev += 1
step_h_norm = norm(step * scale_inv, ord=np.inf)
if not np.all(np.isfinite(f_new)):
Delta = 0.25 * step_h_norm
continue
# Usual trust-region step quality estimation.
if loss_function is not None:
cost_new = loss_function(f_new, cost_only=True)
else:
cost_new = 0.5 * np.dot(f_new, f_new)
actual_reduction = cost - cost_new
Delta, ratio = update_tr_radius(
Delta, actual_reduction, predicted_reduction,
step_h_norm, tr_hit
)
step_norm = norm(step)
termination_status = check_termination(
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
if termination_status is not None:
break
if actual_reduction > 0:
on_bound[free_set] = on_bound_free
x = x_new
# Set variables exactly at the boundary.
mask = on_bound == -1
x[mask] = lb[mask]
mask = on_bound == 1
x[mask] = ub[mask]
f = f_new
f_true = f.copy()
cost = cost_new
J = jac(x)
njev += 1
if loss_function is not None:
rho = loss_function(f)
J, f = scale_for_robust_loss_function(J, f, rho)
g = compute_grad(J, f)
if jac_scale:
scale, scale_inv = compute_jac_scale(J, scale_inv)
else:
step_norm = 0
actual_reduction = 0
iteration += 1
# Call callback function and possibly stop optimization
if callback is not None:
intermediate_result = OptimizeResult(
x=x, fun=f, nit=iteration, nfev=nfev)
intermediate_result["cost"] = cost_new
if _call_callback_maybe_halt(
callback, intermediate_result
):
termination_status = -2
break
if termination_status is None:
termination_status = 0
return OptimizeResult(
x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm,
active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,361 @@
"""Linear least squares with bound constraints on independent variables."""
import numpy as np
from numpy.linalg import norm
from scipy.sparse import issparse, csr_array
from scipy.sparse.linalg import LinearOperator, lsmr
from scipy.optimize import OptimizeResult
from scipy.optimize._minimize import Bounds
from .common import in_bounds, compute_grad
from .trf_linear import trf_linear
from .bvls import bvls
def prepare_bounds(bounds, n):
if len(bounds) != 2:
raise ValueError("`bounds` must contain 2 elements.")
lb, ub = (np.asarray(b, dtype=float) for b in bounds)
if lb.ndim == 0:
lb = np.resize(lb, n)
if ub.ndim == 0:
ub = np.resize(ub, n)
return lb, ub
TERMINATION_MESSAGES = {
-1: "The algorithm was not able to make progress on the last iteration.",
0: "The maximum number of iterations is exceeded.",
1: "The first-order optimality measure is less than `tol`.",
2: "The relative change of the cost function is less than `tol`.",
3: "The unconstrained solution is optimal."
}
def lsq_linear(A, b, bounds=(-np.inf, np.inf), method='trf', tol=1e-10,
lsq_solver=None, lsmr_tol=None, max_iter=None,
verbose=0, *, lsmr_maxiter=None,):
r"""Solve a linear least-squares problem with bounds on the variables.
Given a m-by-n design matrix A and a target vector b with m elements,
`lsq_linear` solves the following optimization problem::
minimize 0.5 * ||A x - b||**2
subject to lb <= x <= ub
This optimization problem is convex, hence a found minimum (if iterations
have converged) is guaranteed to be global.
Parameters
----------
A : array_like, sparse array or LinearOperator, shape (m, n)
Design matrix. Can be `scipy.sparse.linalg.LinearOperator`.
b : array_like, shape (m,)
Target vector.
bounds : 2-tuple of array_like or `Bounds`, optional
Lower and upper bounds on parameters. Defaults to no bounds.
There are two ways to specify the bounds:
- Instance of `Bounds` class.
- 2-tuple of array_like: Each element of the tuple must be either
an array with the length equal to the number of parameters, or a
scalar (in which case the bound is taken to be the same for all
parameters). Use ``np.inf`` with an appropriate sign to disable
bounds on all or some parameters.
method : 'trf' or 'bvls', optional
Method to perform minimization.
* 'trf' : Trust Region Reflective algorithm adapted for a linear
least-squares problem. This is an interior-point-like method
and the required number of iterations is weakly correlated with
the number of variables.
* 'bvls' : Bounded-variable least-squares algorithm. This is
an active set method, which requires the number of iterations
comparable to the number of variables. Can't be used when `A` is
sparse or LinearOperator.
Default is 'trf'.
tol : float, optional
Tolerance parameter. The algorithm terminates if a relative change
of the cost function is less than `tol` on the last iteration.
Additionally, the first-order optimality measure is considered:
* ``method='trf'`` terminates if the uniform norm of the gradient,
scaled to account for the presence of the bounds, is less than
`tol`.
* ``method='bvls'`` terminates if Karush-Kuhn-Tucker conditions
are satisfied within `tol` tolerance.
lsq_solver : {None, 'exact', 'lsmr'}, optional
Method of solving unbounded least-squares problems throughout
iterations:
* 'exact' : Use dense QR or SVD decomposition approach. Can't be
used when `A` is sparse or LinearOperator.
* 'lsmr' : Use `scipy.sparse.linalg.lsmr` iterative procedure
which requires only matrix-vector product evaluations. Can't
be used with ``method='bvls'``.
If None (default), the solver is chosen based on type of `A`.
lsmr_tol : None, float or 'auto', optional
Tolerance parameters 'atol' and 'btol' for `scipy.sparse.linalg.lsmr`
If None (default), it is set to ``1e-2 * tol``. If 'auto', the
tolerance will be adjusted based on the optimality of the current
iterate, which can speed up the optimization process, but is not always
reliable.
max_iter : None or int, optional
Maximum number of iterations before termination. If None (default), it
is set to 100 for ``method='trf'`` or to the number of variables for
``method='bvls'`` (not counting iterations for 'bvls' initialization).
verbose : {0, 1, 2}, optional
Level of algorithm's verbosity:
* 0 : work silently (default).
* 1 : display a termination report.
* 2 : display progress during iterations.
lsmr_maxiter : None or int, optional
Maximum number of iterations for the lsmr least squares solver,
if it is used (by setting ``lsq_solver='lsmr'``). If None (default), it
uses lsmr's default of ``min(m, n)`` where ``m`` and ``n`` are the
number of rows and columns of `A`, respectively. Has no effect if
``lsq_solver='exact'``.
Returns
-------
OptimizeResult with the following fields defined:
x : ndarray, shape (n,)
Solution found.
cost : float
Value of the cost function at the solution.
fun : ndarray, shape (m,)
Vector of residuals at the solution.
optimality : float
First-order optimality measure. The exact meaning depends on `method`,
refer to the description of `tol` parameter.
active_mask : ndarray of int, shape (n,)
Each component shows whether a corresponding constraint is active
(that is, whether a variable is at the bound):
* 0 : a constraint is not active.
* -1 : a lower bound is active.
* 1 : an upper bound is active.
Might be somewhat arbitrary for the `trf` method as it generates a
sequence of strictly feasible iterates and active_mask is determined
within a tolerance threshold.
unbounded_sol : tuple
Unbounded least squares solution tuple returned by the least squares
solver (set with `lsq_solver` option). If `lsq_solver` is not set or is
set to ``'exact'``, the tuple contains an ndarray of shape (n,) with
the unbounded solution, an ndarray with the sum of squared residuals,
an int with the rank of `A`, and an ndarray with the singular values
of `A` (see NumPy's ``linalg.lstsq`` for more information). If
`lsq_solver` is set to ``'lsmr'``, the tuple contains an ndarray of
shape (n,) with the unbounded solution, an int with the exit code,
an int with the number of iterations, and five floats with
various norms and the condition number of `A` (see SciPy's
``sparse.linalg.lsmr`` for more information). This output can be
useful for determining the convergence of the least squares solver,
particularly the iterative ``'lsmr'`` solver. The unbounded least
squares problem is to minimize ``0.5 * ||A x - b||**2``.
nit : int
Number of iterations. Zero if the unconstrained solution is optimal.
status : int
Reason for algorithm termination:
* -1 : the algorithm was not able to make progress on the last
iteration.
* 0 : the maximum number of iterations is exceeded.
* 1 : the first-order optimality measure is less than `tol`.
* 2 : the relative change of the cost function is less than `tol`.
* 3 : the unconstrained solution is optimal.
message : str
Verbal description of the termination reason.
success : bool
True if one of the convergence criteria is satisfied (`status` > 0).
See Also
--------
nnls : Linear least squares with non-negativity constraint.
least_squares : Nonlinear least squares with bounds on the variables.
Notes
-----
The algorithm first computes the unconstrained least-squares solution by
`numpy.linalg.lstsq` or `scipy.sparse.linalg.lsmr` depending on
`lsq_solver`. This solution is returned as optimal if it lies within the
bounds.
Method 'trf' runs the adaptation of the algorithm described in [STIR]_ for
a linear least-squares problem. The iterations are essentially the same as
in the nonlinear least-squares algorithm, but as the quadratic function
model is always accurate, we don't need to track or modify the radius of
a trust region. The line search (backtracking) is used as a safety net
when a selected step does not decrease the cost function. Read more
detailed description of the algorithm in `scipy.optimize.least_squares`.
Method 'bvls' runs a Python implementation of the algorithm described in
[BVLS]_. The algorithm maintains active and free sets of variables, on
each iteration chooses a new variable to move from the active set to the
free set and then solves the unconstrained least-squares problem on free
variables. This algorithm is guaranteed to give an accurate solution
eventually, but may require up to n iterations for a problem with n
variables. Additionally, an ad-hoc initialization procedure is
implemented, that determines which variables to set free or active
initially. It takes some number of iterations before actual BVLS starts,
but can significantly reduce the number of further iterations.
References
----------
.. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
and Conjugate Gradient Method for Large-Scale Bound-Constrained
Minimization Problems," SIAM Journal on Scientific Computing,
Vol. 21, Number 1, pp 1-23, 1999.
.. [BVLS] P. B. Start and R. L. Parker, "Bounded-Variable Least-Squares:
an Algorithm and Applications", Computational Statistics, 10,
129-141, 1995.
Examples
--------
In this example, a problem with a large sparse arrays and bounds on the
variables is solved.
>>> import numpy as np
>>> from scipy.sparse import random_array
>>> from scipy.optimize import lsq_linear
>>> rng = np.random.default_rng()
...
>>> m = 2000
>>> n = 1000
...
>>> A = random_array((m, n), density=1e-4, random_state=rng)
>>> b = rng.standard_normal(m)
...
>>> lb = rng.standard_normal(n)
>>> ub = lb + 1
...
>>> res = lsq_linear(A, b, bounds=(lb, ub), lsmr_tol='auto', verbose=1)
The relative change of the cost function is less than `tol`.
Number of iterations 10, initial cost 1.0070e+03, final cost 9.6602e+02,
first-order optimality 2.21e-09. # may vary
"""
if method not in ['trf', 'bvls']:
raise ValueError("`method` must be 'trf' or 'bvls'")
if lsq_solver not in [None, 'exact', 'lsmr']:
raise ValueError("`solver` must be None, 'exact' or 'lsmr'.")
if verbose not in [0, 1, 2]:
raise ValueError("`verbose` must be in [0, 1, 2].")
if issparse(A):
A = csr_array(A)
elif not isinstance(A, LinearOperator):
A = np.atleast_2d(np.asarray(A))
if method == 'bvls':
if lsq_solver == 'lsmr':
raise ValueError("method='bvls' can't be used with "
"lsq_solver='lsmr'")
if not isinstance(A, np.ndarray):
raise ValueError("method='bvls' can't be used with `A` being "
"sparse or LinearOperator.")
if lsq_solver is None:
if isinstance(A, np.ndarray):
lsq_solver = 'exact'
else:
lsq_solver = 'lsmr'
elif lsq_solver == 'exact' and not isinstance(A, np.ndarray):
raise ValueError("`exact` solver can't be used when `A` is "
"sparse or LinearOperator.")
if len(A.shape) != 2: # No ndim for LinearOperator.
raise ValueError("`A` must have at most 2 dimensions.")
if max_iter is not None and max_iter <= 0:
raise ValueError("`max_iter` must be None or positive integer.")
m, n = A.shape
b = np.atleast_1d(b)
if b.ndim != 1:
raise ValueError("`b` must have at most 1 dimension.")
if b.size != m:
raise ValueError("Inconsistent shapes between `A` and `b`.")
if isinstance(bounds, Bounds):
lb = bounds.lb
ub = bounds.ub
else:
lb, ub = prepare_bounds(bounds, n)
if lb.shape != (n,) and ub.shape != (n,):
raise ValueError("Bounds have wrong shape.")
if np.any(lb >= ub):
raise ValueError("Each lower bound must be strictly less than each "
"upper bound.")
if lsmr_maxiter is not None and lsmr_maxiter < 1:
raise ValueError("`lsmr_maxiter` must be None or positive integer.")
if not ((isinstance(lsmr_tol, float) and lsmr_tol > 0) or
lsmr_tol in ('auto', None)):
raise ValueError("`lsmr_tol` must be None, 'auto', or positive float.")
if lsq_solver == 'exact':
unbd_lsq = np.linalg.lstsq(A, b, rcond=-1)
elif lsq_solver == 'lsmr':
first_lsmr_tol = lsmr_tol # tol of first call to lsmr
if lsmr_tol is None or lsmr_tol == 'auto':
first_lsmr_tol = 1e-2 * tol # default if lsmr_tol not defined
unbd_lsq = lsmr(A, b, maxiter=lsmr_maxiter,
atol=first_lsmr_tol, btol=first_lsmr_tol)
x_lsq = unbd_lsq[0] # extract the solution from the least squares solver
if in_bounds(x_lsq, lb, ub):
r = A @ x_lsq - b
cost = 0.5 * np.dot(r, r)
termination_status = 3
termination_message = TERMINATION_MESSAGES[termination_status]
g = compute_grad(A, r)
g_norm = norm(g, ord=np.inf)
if verbose > 0:
print(termination_message)
print(f"Final cost {cost:.4e}, first-order optimality {g_norm:.2e}")
return OptimizeResult(
x=x_lsq, fun=r, cost=cost, optimality=g_norm,
active_mask=np.zeros(n), unbounded_sol=unbd_lsq,
nit=0, status=termination_status,
message=termination_message, success=True)
if method == 'trf':
res = trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol,
max_iter, verbose, lsmr_maxiter=lsmr_maxiter)
elif method == 'bvls':
res = bvls(A, b, x_lsq, lb, ub, tol, max_iter, verbose)
res.unbounded_sol = unbd_lsq
res.message = TERMINATION_MESSAGES[res.status]
res.success = res.status > 0
if verbose > 0:
print(res.message)
print(
f"Number of iterations {res.nit}, initial cost {res.initial_cost:.4e}, "
f"final cost {res.cost:.4e}, first-order optimality {res.optimality:.2e}."
)
del res.initial_cost
return res

View File

@@ -0,0 +1,587 @@
"""Trust Region Reflective algorithm for least-squares optimization.
The algorithm is based on ideas from paper [STIR]_. The main idea is to
account for the presence of the bounds by appropriate scaling of the variables (or,
equivalently, changing a trust-region shape). Let's introduce a vector v:
| ub[i] - x[i], if g[i] < 0 and ub[i] < np.inf
v[i] = | x[i] - lb[i], if g[i] > 0 and lb[i] > -np.inf
| 1, otherwise
where g is the gradient of a cost function and lb, ub are the bounds. Its
components are distances to the bounds at which the anti-gradient points (if
this distance is finite). Define a scaling matrix D = diag(v**0.5).
First-order optimality conditions can be stated as
D^2 g(x) = 0.
Meaning that components of the gradient should be zero for strictly interior
variables, and components must point inside the feasible region for variables
on the bound.
Now consider this system of equations as a new optimization problem. If the
point x is strictly interior (not on the bound), then the left-hand side is
differentiable and the Newton step for it satisfies
(D^2 H + diag(g) Jv) p = -D^2 g
where H is the Hessian matrix (or its J^T J approximation in least squares),
Jv is the Jacobian matrix of v with components -1, 1 or 0, such that all
elements of matrix C = diag(g) Jv are non-negative. Introduce the change
of the variables x = D x_h (_h would be "hat" in LaTeX). In the new variables,
we have a Newton step satisfying
B_h p_h = -g_h,
where B_h = D H D + C, g_h = D g. In least squares B_h = J_h^T J_h, where
J_h = J D. Note that J_h and g_h are proper Jacobian and gradient with respect
to "hat" variables. To guarantee global convergence we formulate a
trust-region problem based on the Newton step in the new variables:
0.5 * p_h^T B_h p + g_h^T p_h -> min, ||p_h|| <= Delta
In the original space B = H + D^{-1} C D^{-1}, and the equivalent trust-region
problem is
0.5 * p^T B p + g^T p -> min, ||D^{-1} p|| <= Delta
Here, the meaning of the matrix D becomes more clear: it alters the shape
of a trust-region, such that large steps towards the bounds are not allowed.
In the implementation, the trust-region problem is solved in "hat" space,
but handling of the bounds is done in the original space (see below and read
the code).
The introduction of the matrix D doesn't allow to ignore bounds, the algorithm
must keep iterates strictly feasible (to satisfy aforementioned
differentiability), the parameter theta controls step back from the boundary
(see the code for details).
The algorithm does another important trick. If the trust-region solution
doesn't fit into the bounds, then a reflected (from a firstly encountered
bound) search direction is considered. For motivation and analysis refer to
[STIR]_ paper (and other papers of the authors). In practice, it doesn't need
a lot of justifications, the algorithm simply chooses the best step among
three: a constrained trust-region step, a reflected step and a constrained
Cauchy step (a minimizer along -g_h in "hat" space, or -D^2 g in the original
space).
Another feature is that a trust-region radius control strategy is modified to
account for appearance of the diagonal C matrix (called diag_h in the code).
Note that all described peculiarities are completely gone as we consider
problems without bounds (the algorithm becomes a standard trust-region type
algorithm very similar to ones implemented in MINPACK).
The implementation supports two methods of solving the trust-region problem.
The first, called 'exact', applies SVD on Jacobian and then solves the problem
very accurately using the algorithm described in [JJMore]_. It is not
applicable to large problem. The second, called 'lsmr', uses the 2-D subspace
approach (sometimes called "indefinite dogleg"), where the problem is solved
in a subspace spanned by the gradient and the approximate Gauss-Newton step
found by ``scipy.sparse.linalg.lsmr``. A 2-D trust-region problem is
reformulated as a 4th order algebraic equation and solved very accurately by
``numpy.roots``. The subspace approach allows to solve very large problems
(up to couple of millions of residuals on a regular PC), provided the Jacobian
matrix is sufficiently sparse.
References
----------
.. [STIR] Branch, M.A., T.F. Coleman, and Y. Li, "A Subspace, Interior,
and Conjugate Gradient Method for Large-Scale Bound-Constrained
Minimization Problems," SIAM Journal on Scientific Computing,
Vol. 21, Number 1, pp 1-23, 1999.
.. [JJMore] More, J. J., "The Levenberg-Marquardt Algorithm: Implementation
and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
"""
import numpy as np
from numpy.linalg import norm
from scipy.linalg import svd, qr
from scipy.sparse.linalg import lsmr
from scipy.optimize import OptimizeResult
from .common import (
step_size_to_bound, find_active_constraints, in_bounds,
make_strictly_feasible, intersect_trust_region, solve_lsq_trust_region,
solve_trust_region_2d, minimize_quadratic_1d, build_quadratic_1d,
evaluate_quadratic, right_multiplied_operator, regularized_lsq_operator,
CL_scaling_vector, compute_grad, compute_jac_scale, check_termination,
update_tr_radius, scale_for_robust_loss_function, print_header_nonlinear,
print_iteration_nonlinear)
from scipy._lib._util import _call_callback_maybe_halt
def trf(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
loss_function, tr_solver, tr_options, verbose, callback=None):
# For efficiency, it makes sense to run the simplified version of the
# algorithm when no bounds are imposed. We decided to write the two
# separate functions. It violates the DRY principle, but the individual
# functions are kept the most readable.
if np.all(lb == -np.inf) and np.all(ub == np.inf):
return trf_no_bounds(
fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev, x_scale,
loss_function, tr_solver, tr_options, verbose, callback=callback)
else:
return trf_bounds(
fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale,
loss_function, tr_solver, tr_options, verbose, callback=callback)
def select_step(x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta):
"""Select the best step according to Trust Region Reflective algorithm."""
if in_bounds(x + p, lb, ub):
p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
return p, p_h, -p_value
p_stride, hits = step_size_to_bound(x, p, lb, ub)
# Compute the reflected direction.
r_h = np.copy(p_h)
r_h[hits.astype(bool)] *= -1
r = d * r_h
# Restrict trust-region step, such that it hits the bound.
p *= p_stride
p_h *= p_stride
x_on_bound = x + p
# Reflected direction will cross first either feasible region or trust
# region boundary.
_, to_tr = intersect_trust_region(p_h, r_h, Delta)
to_bound, _ = step_size_to_bound(x_on_bound, r, lb, ub)
# Find lower and upper bounds on a step size along the reflected
# direction, considering the strict feasibility requirement. There is no
# single correct way to do that, the chosen approach seems to work best
# on test problems.
r_stride = min(to_bound, to_tr)
if r_stride > 0:
r_stride_l = (1 - theta) * p_stride / r_stride
if r_stride == to_bound:
r_stride_u = theta * to_bound
else:
r_stride_u = to_tr
else:
r_stride_l = 0
r_stride_u = -1
# Check if reflection step is available.
if r_stride_l <= r_stride_u:
a, b, c = build_quadratic_1d(J_h, g_h, r_h, s0=p_h, diag=diag_h)
r_stride, r_value = minimize_quadratic_1d(
a, b, r_stride_l, r_stride_u, c=c)
r_h *= r_stride
r_h += p_h
r = r_h * d
else:
r_value = np.inf
# Now correct p_h to make it strictly interior.
p *= theta
p_h *= theta
p_value = evaluate_quadratic(J_h, g_h, p_h, diag=diag_h)
ag_h = -g_h
ag = d * ag_h
to_tr = Delta / norm(ag_h)
to_bound, _ = step_size_to_bound(x, ag, lb, ub)
if to_bound < to_tr:
ag_stride = theta * to_bound
else:
ag_stride = to_tr
a, b = build_quadratic_1d(J_h, g_h, ag_h, diag=diag_h)
ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride)
ag_h *= ag_stride
ag *= ag_stride
if p_value < r_value and p_value < ag_value:
return p, p_h, -p_value
elif r_value < p_value and r_value < ag_value:
return r, r_h, -r_value
else:
return ag, ag_h, -ag_value
def trf_bounds(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev,
x_scale, loss_function, tr_solver, tr_options, verbose,
callback=None):
x = x0.copy()
f = f0
f_true = f.copy()
nfev = 1
J = J0
njev = 1
m, n = J.shape
if loss_function is not None:
rho = loss_function(f)
cost = 0.5 * np.sum(rho[0])
J, f = scale_for_robust_loss_function(J, f, rho)
else:
cost = 0.5 * np.dot(f, f)
g = compute_grad(J, f)
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
if jac_scale:
scale, scale_inv = compute_jac_scale(J)
else:
scale, scale_inv = x_scale, 1 / x_scale
v, dv = CL_scaling_vector(x, g, lb, ub)
v[dv != 0] *= scale_inv[dv != 0]
Delta = norm(x0 * scale_inv / v**0.5)
if Delta == 0:
Delta = 1.0
g_norm = norm(g * v, ord=np.inf)
f_augmented = np.zeros(m + n)
if tr_solver == 'exact':
J_augmented = np.empty((m + n, n))
elif tr_solver == 'lsmr':
reg_term = 0.0
regularize = tr_options.pop('regularize', True)
if max_nfev is None:
max_nfev = x0.size * 100
alpha = 0.0 # "Levenberg-Marquardt" parameter
termination_status = None
iteration = 0
step_norm = None
actual_reduction = None
if verbose == 2:
print_header_nonlinear()
while True:
v, dv = CL_scaling_vector(x, g, lb, ub)
g_norm = norm(g * v, ord=np.inf)
if g_norm < gtol:
termination_status = 1
if verbose == 2:
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
step_norm, g_norm)
if termination_status is not None or nfev == max_nfev:
break
# Now compute variables in "hat" space. Here, we also account for
# scaling introduced by `x_scale` parameter. This part is a bit tricky,
# you have to write down the formulas and see how the trust-region
# problem is formulated when the two types of scaling are applied.
# The idea is that first we apply `x_scale` and then apply Coleman-Li
# approach in the new variables.
# v is recomputed in the variables after applying `x_scale`, note that
# components which were identically 1 not affected.
v[dv != 0] *= scale_inv[dv != 0]
# Here, we apply two types of scaling.
d = v**0.5 * scale
# C = diag(g * scale) Jv
diag_h = g * dv * scale
# After all this has been done, we continue normally.
# "hat" gradient.
g_h = d * g
f_augmented[:m] = f
if tr_solver == 'exact':
J_augmented[:m] = J * d
J_h = J_augmented[:m] # Memory view.
J_augmented[m:] = np.diag(diag_h**0.5)
U, s, V = svd(J_augmented, full_matrices=False)
V = V.T
uf = U.T.dot(f_augmented)
elif tr_solver == 'lsmr':
J_h = right_multiplied_operator(J, d)
if regularize:
a, b = build_quadratic_1d(J_h, g_h, -g_h, diag=diag_h)
to_tr = Delta / norm(g_h)
ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
reg_term = -ag_value / Delta**2
lsmr_op = regularized_lsq_operator(J_h, (diag_h + reg_term)**0.5)
gn_h = lsmr(lsmr_op, f_augmented, **tr_options)[0]
S = np.vstack((g_h, gn_h)).T
S, _ = qr(S, mode='economic')
JS = J_h.dot(S) # LinearOperator does dot too.
B_S = np.dot(JS.T, JS) + np.dot(S.T * diag_h, S)
g_S = S.T.dot(g_h)
# theta controls step back step ratio from the bounds.
theta = max(0.995, 1 - g_norm)
actual_reduction = -1
while actual_reduction <= 0 and nfev < max_nfev:
if tr_solver == 'exact':
p_h, alpha, n_iter = solve_lsq_trust_region(
n, m, uf, s, V, Delta, initial_alpha=alpha)
elif tr_solver == 'lsmr':
p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
p_h = S.dot(p_S)
p = d * p_h # Trust-region solution in the original space.
step, step_h, predicted_reduction = select_step(
x, J_h, diag_h, g_h, p, p_h, d, Delta, lb, ub, theta)
x_new = make_strictly_feasible(x + step, lb, ub, rstep=0)
f_new = fun(x_new)
nfev += 1
step_h_norm = norm(step_h)
if not np.all(np.isfinite(f_new)):
Delta = 0.25 * step_h_norm
continue
# Usual trust-region step quality estimation.
if loss_function is not None:
cost_new = loss_function(f_new, cost_only=True)
else:
cost_new = 0.5 * np.dot(f_new, f_new)
actual_reduction = cost - cost_new
Delta_new, ratio = update_tr_radius(
Delta, actual_reduction, predicted_reduction,
step_h_norm, step_h_norm > 0.95 * Delta)
step_norm = norm(step)
termination_status = check_termination(
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
if termination_status is not None:
break
alpha *= Delta / Delta_new
Delta = Delta_new
if actual_reduction > 0:
x = x_new
f = f_new
f_true = f.copy()
cost = cost_new
J = jac(x)
njev += 1
if loss_function is not None:
rho = loss_function(f)
J, f = scale_for_robust_loss_function(J, f, rho)
g = compute_grad(J, f)
if jac_scale:
scale, scale_inv = compute_jac_scale(J, scale_inv)
else:
step_norm = 0
actual_reduction = 0
iteration += 1
# Call callback function and possibly stop optimization
if callback is not None:
intermediate_result = OptimizeResult(
x=x, fun=f_true, nit=iteration, nfev=nfev)
intermediate_result["cost"] = cost
if _call_callback_maybe_halt(
callback, intermediate_result
):
termination_status = -2
break
if termination_status is None:
termination_status = 0
active_mask = find_active_constraints(x, lb, ub, rtol=xtol)
return OptimizeResult(
x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
active_mask=active_mask, nfev=nfev, njev=njev,
status=termination_status)
def trf_no_bounds(fun, jac, x0, f0, J0, ftol, xtol, gtol, max_nfev,
x_scale, loss_function, tr_solver, tr_options, verbose,
callback=None):
x = x0.copy()
f = f0
f_true = f.copy()
nfev = 1
J = J0
njev = 1
m, n = J.shape
if loss_function is not None:
rho = loss_function(f)
cost = 0.5 * np.sum(rho[0])
J, f = scale_for_robust_loss_function(J, f, rho)
else:
cost = 0.5 * np.dot(f, f)
g = compute_grad(J, f)
jac_scale = isinstance(x_scale, str) and x_scale == 'jac'
if jac_scale:
scale, scale_inv = compute_jac_scale(J)
else:
scale, scale_inv = x_scale, 1 / x_scale
Delta = norm(x0 * scale_inv)
if Delta == 0:
Delta = 1.0
if tr_solver == 'lsmr':
reg_term = 0
damp = tr_options.pop('damp', 0.0)
regularize = tr_options.pop('regularize', True)
if max_nfev is None:
max_nfev = x0.size * 100
alpha = 0.0 # "Levenberg-Marquardt" parameter
termination_status = None
iteration = 0
step_norm = None
actual_reduction = None
if verbose == 2:
print_header_nonlinear()
while True:
g_norm = norm(g, ord=np.inf)
if g_norm < gtol:
termination_status = 1
if verbose == 2:
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction,
step_norm, g_norm)
if termination_status is not None or nfev == max_nfev:
break
d = scale
g_h = d * g
if tr_solver == 'exact':
J_h = J * d
U, s, V = svd(J_h, full_matrices=False)
V = V.T
uf = U.T.dot(f)
elif tr_solver == 'lsmr':
J_h = right_multiplied_operator(J, d)
if regularize:
a, b = build_quadratic_1d(J_h, g_h, -g_h)
to_tr = Delta / norm(g_h)
ag_value = minimize_quadratic_1d(a, b, 0, to_tr)[1]
reg_term = -ag_value / Delta**2
damp_full = (damp**2 + reg_term)**0.5
gn_h = lsmr(J_h, f, damp=damp_full, **tr_options)[0]
S = np.vstack((g_h, gn_h)).T
S, _ = qr(S, mode='economic')
JS = J_h.dot(S)
B_S = np.dot(JS.T, JS)
g_S = S.T.dot(g_h)
actual_reduction = -1
while actual_reduction <= 0 and nfev < max_nfev:
if tr_solver == 'exact':
step_h, alpha, n_iter = solve_lsq_trust_region(
n, m, uf, s, V, Delta, initial_alpha=alpha)
elif tr_solver == 'lsmr':
p_S, _ = solve_trust_region_2d(B_S, g_S, Delta)
step_h = S.dot(p_S)
predicted_reduction = -evaluate_quadratic(J_h, g_h, step_h)
step = d * step_h
x_new = x + step
f_new = fun(x_new)
nfev += 1
step_h_norm = norm(step_h)
if not np.all(np.isfinite(f_new)):
Delta = 0.25 * step_h_norm
continue
# Usual trust-region step quality estimation.
if loss_function is not None:
cost_new = loss_function(f_new, cost_only=True)
else:
cost_new = 0.5 * np.dot(f_new, f_new)
actual_reduction = cost - cost_new
Delta_new, ratio = update_tr_radius(
Delta, actual_reduction, predicted_reduction,
step_h_norm, step_h_norm > 0.95 * Delta)
step_norm = norm(step)
termination_status = check_termination(
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol)
if termination_status is not None:
break
alpha *= Delta / Delta_new
Delta = Delta_new
if actual_reduction > 0:
x = x_new
f = f_new
f_true = f.copy()
cost = cost_new
J = jac(x)
njev += 1
if loss_function is not None:
rho = loss_function(f)
J, f = scale_for_robust_loss_function(J, f, rho)
g = compute_grad(J, f)
if jac_scale:
scale, scale_inv = compute_jac_scale(J, scale_inv)
else:
step_norm = 0
actual_reduction = 0
iteration += 1
# Call callback function and possibly stop optimization
if callback is not None:
intermediate_result = OptimizeResult(
x=x, fun=f_true, nit=iteration, nfev=nfev)
intermediate_result["cost"] = cost
if _call_callback_maybe_halt(
callback, intermediate_result
):
termination_status = -2
break
if termination_status is None:
termination_status = 0
active_mask = np.zeros_like(x)
return OptimizeResult(
x=x, cost=cost, fun=f_true, jac=J, grad=g, optimality=g_norm,
active_mask=active_mask, nfev=nfev, njev=njev,
status=termination_status)

View File

@@ -0,0 +1,249 @@
"""The adaptation of Trust Region Reflective algorithm for a linear
least-squares problem."""
import numpy as np
from numpy.linalg import norm
from scipy.linalg import qr, solve_triangular
from scipy.sparse.linalg import lsmr
from scipy.optimize import OptimizeResult
from .givens_elimination import givens_elimination
from .common import (
EPS, step_size_to_bound, find_active_constraints, in_bounds,
make_strictly_feasible, build_quadratic_1d, evaluate_quadratic,
minimize_quadratic_1d, CL_scaling_vector, reflective_transformation,
print_header_linear, print_iteration_linear, compute_grad,
regularized_lsq_operator, right_multiplied_operator)
def regularized_lsq_with_qr(m, n, R, QTb, perm, diag, copy_R=True):
"""Solve regularized least squares using information from QR-decomposition.
The initial problem is to solve the following system in a least-squares
sense::
A x = b
D x = 0
where D is diagonal matrix. The method is based on QR decomposition
of the form A P = Q R, where P is a column permutation matrix, Q is an
orthogonal matrix and R is an upper triangular matrix.
Parameters
----------
m, n : int
Initial shape of A.
R : ndarray, shape (n, n)
Upper triangular matrix from QR decomposition of A.
QTb : ndarray, shape (n,)
First n components of Q^T b.
perm : ndarray, shape (n,)
Array defining column permutation of A, such that ith column of
P is perm[i]-th column of identity matrix.
diag : ndarray, shape (n,)
Array containing diagonal elements of D.
Returns
-------
x : ndarray, shape (n,)
Found least-squares solution.
"""
if copy_R:
R = R.copy()
v = QTb.copy()
givens_elimination(R, v, diag[perm])
abs_diag_R = np.abs(np.diag(R))
threshold = EPS * max(m, n) * np.max(abs_diag_R)
nns, = np.nonzero(abs_diag_R > threshold)
R = R[np.ix_(nns, nns)]
v = v[nns]
x = np.zeros(n)
x[perm[nns]] = solve_triangular(R, v)
return x
def backtracking(A, g, x, p, theta, p_dot_g, lb, ub):
"""Find an appropriate step size using backtracking line search."""
alpha = 1
while True:
x_new, _ = reflective_transformation(x + alpha * p, lb, ub)
step = x_new - x
cost_change = -evaluate_quadratic(A, g, step)
if cost_change > -0.1 * alpha * p_dot_g:
break
alpha *= 0.5
active = find_active_constraints(x_new, lb, ub)
if np.any(active != 0):
x_new, _ = reflective_transformation(x + theta * alpha * p, lb, ub)
x_new = make_strictly_feasible(x_new, lb, ub, rstep=0)
step = x_new - x
cost_change = -evaluate_quadratic(A, g, step)
return x, step, cost_change
def select_step(x, A_h, g_h, c_h, p, p_h, d, lb, ub, theta):
"""Select the best step according to Trust Region Reflective algorithm."""
if in_bounds(x + p, lb, ub):
return p
p_stride, hits = step_size_to_bound(x, p, lb, ub)
r_h = np.copy(p_h)
r_h[hits.astype(bool)] *= -1
r = d * r_h
# Restrict step, such that it hits the bound.
p *= p_stride
p_h *= p_stride
x_on_bound = x + p
# Find the step size along reflected direction.
r_stride_u, _ = step_size_to_bound(x_on_bound, r, lb, ub)
# Stay interior.
r_stride_l = (1 - theta) * r_stride_u
r_stride_u *= theta
if r_stride_u > 0:
a, b, c = build_quadratic_1d(A_h, g_h, r_h, s0=p_h, diag=c_h)
r_stride, r_value = minimize_quadratic_1d(
a, b, r_stride_l, r_stride_u, c=c)
r_h = p_h + r_h * r_stride
r = d * r_h
else:
r_value = np.inf
# Now correct p_h to make it strictly interior.
p_h *= theta
p *= theta
p_value = evaluate_quadratic(A_h, g_h, p_h, diag=c_h)
ag_h = -g_h
ag = d * ag_h
ag_stride_u, _ = step_size_to_bound(x, ag, lb, ub)
ag_stride_u *= theta
a, b = build_quadratic_1d(A_h, g_h, ag_h, diag=c_h)
ag_stride, ag_value = minimize_quadratic_1d(a, b, 0, ag_stride_u)
ag *= ag_stride
if p_value < r_value and p_value < ag_value:
return p
elif r_value < p_value and r_value < ag_value:
return r
else:
return ag
def trf_linear(A, b, x_lsq, lb, ub, tol, lsq_solver, lsmr_tol,
max_iter, verbose, *, lsmr_maxiter=None):
m, n = A.shape
x, _ = reflective_transformation(x_lsq, lb, ub)
x = make_strictly_feasible(x, lb, ub, rstep=0.1)
if lsq_solver == 'exact':
QT, R, perm = qr(A, mode='economic', pivoting=True)
QT = QT.T
if m < n:
R = np.vstack((R, np.zeros((n - m, n))))
QTr = np.zeros(n)
k = min(m, n)
elif lsq_solver == 'lsmr':
r_aug = np.zeros(m + n)
auto_lsmr_tol = False
if lsmr_tol is None:
lsmr_tol = 1e-2 * tol
elif lsmr_tol == 'auto':
auto_lsmr_tol = True
r = A.dot(x) - b
g = compute_grad(A, r)
cost = 0.5 * np.dot(r, r)
initial_cost = cost
termination_status = None
step_norm = None
cost_change = None
if max_iter is None:
max_iter = 100
if verbose == 2:
print_header_linear()
for iteration in range(max_iter):
v, dv = CL_scaling_vector(x, g, lb, ub)
g_scaled = g * v
g_norm = norm(g_scaled, ord=np.inf)
if g_norm < tol:
termination_status = 1
if verbose == 2:
print_iteration_linear(iteration, cost, cost_change,
step_norm, g_norm)
if termination_status is not None:
break
diag_h = g * dv
diag_root_h = diag_h ** 0.5
d = v ** 0.5
g_h = d * g
A_h = right_multiplied_operator(A, d)
if lsq_solver == 'exact':
QTr[:k] = QT.dot(r)
p_h = -regularized_lsq_with_qr(m, n, R * d[perm], QTr, perm,
diag_root_h, copy_R=False)
elif lsq_solver == 'lsmr':
lsmr_op = regularized_lsq_operator(A_h, diag_root_h)
r_aug[:m] = r
if auto_lsmr_tol:
eta = 1e-2 * min(0.5, g_norm)
lsmr_tol = max(EPS, min(0.1, eta * g_norm))
p_h = -lsmr(lsmr_op, r_aug, maxiter=lsmr_maxiter,
atol=lsmr_tol, btol=lsmr_tol)[0]
p = d * p_h
p_dot_g = np.dot(p, g)
if p_dot_g > 0:
termination_status = -1
theta = 1 - min(0.005, g_norm)
step = select_step(x, A_h, g_h, diag_h, p, p_h, d, lb, ub, theta)
cost_change = -evaluate_quadratic(A, g, step)
# Perhaps almost never executed, the idea is that `p` is descent
# direction thus we must find acceptable cost decrease using simple
# "backtracking", otherwise the algorithm's logic would break.
if cost_change < 0:
x, step, cost_change = backtracking(
A, g, x, p, theta, p_dot_g, lb, ub)
else:
x = make_strictly_feasible(x + step, lb, ub, rstep=0)
step_norm = norm(step)
r = A.dot(x) - b
g = compute_grad(A, r)
if cost_change < tol * cost:
termination_status = 2
cost = 0.5 * np.dot(r, r)
if termination_status is None:
termination_status = 0
active_mask = find_active_constraints(x, lb, ub, rtol=tol)
return OptimizeResult(
x=x, fun=r, cost=cost, optimality=g_norm, active_mask=active_mask,
nit=iteration + 1, status=termination_status,
initial_cost=initial_cost)