add read me

This commit is contained in:
2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions

View File

@@ -0,0 +1,212 @@
# Bounds may appear unused in this file but we need to import it to make it available to the user
from scipy.optimize import NonlinearConstraint, LinearConstraint, Bounds
from .common._nonlinear_constraints import process_nl_constraints
from .common._linear_constraints import (
combine_multiple_linear_constraints,
separate_LC_into_eq_and_ineq,
)
from .common._bounds import process_bounds
from enum import Enum
from .common._project import _project
from .common.linalg import get_arrays_tol
from .cobyla.cobyla import cobyla
import numpy as np
from collections.abc import Iterable
class ConstraintType(Enum):
LINEAR_OBJECT = 5
NONLINEAR_OBJECT = 10
LINEAR_DICT = 15
NONLINEAR_DICT = 20
def get_constraint_type(constraint):
if isinstance(constraint, dict) and ("A" in constraint) and ("lb" in constraint) and ("ub" in constraint):
return ConstraintType.LINEAR_DICT
elif isinstance(constraint, dict) and ("fun" in constraint) and ("lb" in constraint) and ("ub" in constraint):
return ConstraintType.NONLINEAR_DICT
elif hasattr(constraint, "A") and hasattr(constraint, "lb") and hasattr(constraint, "ub"):
return ConstraintType.LINEAR_OBJECT
elif hasattr(constraint, "fun") and hasattr(constraint, "lb") and hasattr(constraint, "ub"):
return ConstraintType.NONLINEAR_OBJECT
else:
raise ValueError(f"Constraint type {type(constraint)} not recognized")
def process_constraints(constraints):
# First throw it back if it's an empty tuple
if not constraints:
return None, None
# Next figure out if it's a list of constraints or a single constraint
# If it's a single constraint, make it a list, and then the remaining logic
# doesn't have to change
if not isinstance(constraints, Iterable):
constraints = [constraints]
# Separate out the linear and nonlinear constraints
linear_constraints = []
nonlinear_constraints = []
for constraint in constraints:
constraint_type = get_constraint_type(constraint)
if constraint_type is ConstraintType.LINEAR_OBJECT:
linear_constraints.append(constraint)
elif constraint_type is ConstraintType.NONLINEAR_OBJECT:
nonlinear_constraints.append(constraint)
elif constraint_type == ConstraintType.LINEAR_DICT:
linear_constraints.append(LinearConstraint(constraint["A"], constraint["lb"], constraint["ub"]))
elif constraint_type == ConstraintType.NONLINEAR_DICT:
nonlinear_constraints.append(NonlinearConstraint(constraint["fun"], constraint["lb"], constraint["ub"]))
else:
raise ValueError("Constraint type not recognized")
if len(nonlinear_constraints) > 0:
nonlinear_constraint_function = process_nl_constraints(nonlinear_constraints)
else:
nonlinear_constraint_function = None
# Determine if we have multiple linear constraints, just 1, or none, and process accordingly
if len(linear_constraints) > 1:
linear_constraint = combine_multiple_linear_constraints(linear_constraints)
elif len(linear_constraints) == 1:
linear_constraint = linear_constraints[0]
else:
linear_constraint = None
return linear_constraint, nonlinear_constraint_function
def minimize(fun, x0, args=(), method=None, bounds=None, constraints=(), callback=None, options=None):
linear_constraint, nonlinear_constraint_function = process_constraints(constraints)
options = {'quiet': True} if options is None else options
quiet = options.get("quiet", True)
if method is None:
if nonlinear_constraint_function is not None:
if not quiet: print("Nonlinear constraints detected, applying COBYLA")
method = "cobyla"
elif linear_constraint is not None:
if not quiet: print("Linear constraints detected without nonlinear constraints, applying LINCOA")
method = "lincoa"
elif bounds is not None:
if not quiet: print("Bounds without linear or nonlinear constraints detected, applying BOBYQA")
method = "bobyqa"
else:
if not quiet: print("No bounds or constraints detected, applying NEWUOA")
method = "newuoa"
else:
# Raise some errors if methods were called with inappropriate options
method = method.lower()
if method not in ('newuoa', 'uobyqa', 'bobyqa', 'cobyla', 'lincoa'):
raise ValueError(f"Method must be one of NEWUOA, UOBYQA, BOBYQA, COBYLA, or LINCOA, not '{method}'")
if method != "cobyla" and nonlinear_constraint_function is not None:
raise ValueError("Nonlinear constraints were provided for an algorithm that cannot handle them")
if method not in ("cobyla", "lincoa") and linear_constraint is not None:
raise ValueError("Linear constraints were provided for an algorithm that cannot handle them")
if method not in ("cobyla", "bobyqa", "lincoa") and bounds is not None:
raise ValueError("Bounds were provided for an algorithm that cannot handle them")
# Try to get the length of x0. If we can't that likely means it's a scalar, and
# in that case we turn it into an array and wrap the original function so that it
# can accept an array and return a scalar.
try:
lenx0 = len(x0)
except TypeError:
x0 = np.array([x0])
original_scalar_fun = fun
def scalar_fun(x):
return original_scalar_fun(x[0], *args)
fun = scalar_fun
lenx0 = 1
lb, ub = process_bounds(bounds, lenx0)
# Check which variables are fixed and eliminate them from the problem.
# Save the indices and values so that we can call the original function with
# an array of the appropriate size, and so that we can add the fixed values to the
# result when COBYLA returns.
tol = get_arrays_tol(lb, ub)
_fixed_idx = (
(lb <= ub)
& (np.abs(lb - ub) < tol)
)
if any(_fixed_idx):
_fixed_values = 0.5 * (
lb[_fixed_idx] + ub[_fixed_idx]
)
_fixed_values = np.clip(
_fixed_values,
lb[_fixed_idx],
ub[_fixed_idx],
)
x0 = x0[~_fixed_idx]
lb = lb[~_fixed_idx]
ub = ub[~_fixed_idx]
original_fun = fun
def fixed_fun(x):
newx = np.zeros(lenx0)
newx[_fixed_idx] = _fixed_values
newx[~_fixed_idx] = x
return original_fun(newx, *args)
fun = fixed_fun
# Project x0 onto the feasible set
if nonlinear_constraint_function is None:
result = _project(x0, lb, ub, {"linear": linear_constraint, "nonlinear": None})
x0 = result.x
if linear_constraint is not None:
A_eq, b_eq, A_ineq, b_ineq = separate_LC_into_eq_and_ineq(linear_constraint)
else:
A_eq, b_eq, A_ineq, b_ineq = None, None, None, None
if nonlinear_constraint_function is not None:
# If there is a nonlinear constraint function, we will call COBYLA, which needs the number of nonlinear
# constraints (m_nlcon). In order to get this number we need to evaluate the constraint function at x0.
# The constraint value at x0 (nlconstr0) is not discarded but passed down to the Fortran backend, as its
# evaluation is assumed to be expensive. We also evaluate the objective function at x0 and pass the result
# (f0) down to the Fortran backend, which expects nlconstr0 and f0 to be provided in sync.
def calcfc(x):
f = fun(x, *args)
nlconstr = nonlinear_constraint_function(x)
return f, nlconstr
else:
def calcfc(x):
f = fun(x, *args)
constr = np.zeros(0)
return f, constr
f0, nlconstr0 = calcfc(x0)
if 'quiet' in options:
del options['quiet']
if 'maxfev' in options:
options['maxfun'] = options['maxfev']
del options['maxfev']
result = cobyla(
calcfc,
len(nlconstr0),
x0,
A_ineq,
b_ineq,
A_eq,
b_eq,
lb,
ub,
f0=f0,
nlconstr0=nlconstr0,
callback=callback,
**options
)
if any(_fixed_idx):
newx = np.zeros(lenx0)
newx[_fixed_idx] = _fixed_values
newx[~_fixed_idx] = result.x
result.x = newx
return result

View File

@@ -0,0 +1,559 @@
'''
This module provides Powell's COBYLA algorithm.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
N.B.:
1. The modern-Fortran reference implementation in PRIMA contains bug fixes and improvements over the
original Fortran 77 implementation by Powell. Consequently, the PRIMA implementation behaves differently
from the original Fortran 77 implementation by Powell. Therefore, it is important to point out that
you are using PRIMA rather than the original solvers if you want your results to be reproducible.
2. Compared to Powell's Fortran 77 implementation, the modern-Fortran implementation and hence any
faithful translation like this one generally produce better solutions with fewer function evaluations,
making them preferable for applications with expensive function evaluations. However, if function
evaluations are not the dominant cost in your application, the Fortran 77 solvers are likely to be
faster, as they are more efficient in terms of memory usage and flops thanks to the careful and
ingenious (but unmaintained and unmaintainable) implementation by Powell.
See the PRIMA documentation (www.libprima.net) for more information.
'''
from ..common.evaluate import evaluate, moderatex, moderatef, moderatec
from ..common.consts import (EPS, RHOBEG_DEFAULT, RHOEND_DEFAULT, CTOL_DEFAULT,
CWEIGHT_DEFAULT, FTARGET_DEFAULT, IPRINT_DEFAULT,
MAXFUN_DIM_DEFAULT, DEBUGGING, BOUNDMAX,
ETA1_DEFAULT, ETA2_DEFAULT, GAMMA1_DEFAULT,
GAMMA2_DEFAULT)
from ..common.preproc import preproc
from ..common.present import present
from ..common.linalg import matprod
from .cobylb import cobylb
import numpy as np
from dataclasses import dataclass
from copy import copy
@dataclass
class COBYLAResult:
x: np.ndarray
f: float
constr: np.ndarray
cstrv: float
nf: int
xhist: np.ndarray | None
fhist: np.ndarray | None
chist: np.ndarray | None
conhist: np.ndarray | None
info: int
def cobyla(calcfc, m_nlcon, x, Aineq=None, bineq=None, Aeq=None, beq=None,
xl=None, xu=None, f0=None, nlconstr0=None, rhobeg=None, rhoend=None,
ftarget=FTARGET_DEFAULT, ctol=CTOL_DEFAULT, cweight=CWEIGHT_DEFAULT,
maxfun=None, iprint=IPRINT_DEFAULT, eta1=None, eta2=None,
gamma1=GAMMA1_DEFAULT, gamma2=GAMMA2_DEFAULT, maxhist=None, maxfilt=2000,
callback=None):
"""
Among all the arguments, only CALCFC, M_NLCON, and X are obligatory. The others are
OPTIONAL and you can neglect them unless you are familiar with the algorithm. Any
unspecified optional input will take the default value detailed below. For
instance, we may invoke the solver as follows.
# First define CALCFC, M_NLCON, and X, and then do the following.
result = cobyla(calcfc, m_nlcon, x)
or
# First define CALCFC, M_NLCON, X, Aineq, and Bineq, and then do the following.
result = cobyla(calcfc, m_nlcon, x, Aineq=Aineq, bineq=bineq, rhobeg=1.0e0,
rhoend=1.0e-6)
####################################################################################
# IMPORTANT NOTICE: The user must set M_NLCON correctly to the number of nonlinear
# constraints, namely the size of NLCONSTR introduced below. Set it to 0 if there
# is no nonlinear constraint.
####################################################################################
See examples/cobyla/cobyla_example.py for a concrete example.
A detailed introduction to the arguments is as follows.
####################################################################################
# INPUTS
####################################################################################
CALCFC
Input, function.
f, nlconstr = CALCFC(X) should evaluate the objective function and nonlinear
constraints at the given vector X; it should return a tuple consisting of the
objective function value and the nonlinear constraint value. It must be provided
by the user, and its definition must conform to the following interface:
#-------------------------------------------------------------------------#
def calcfc(x):
f = 0.0
nlconstr = np.zeros(m_nlcon)
return f, nlconstr
#-------------------------------------------------------------------------#
M_NLCON
Input, scalar.
M_NLCON must be set to the number of nonlinear constraints, namely the size of
NLCONSTR(X).
N.B.:
1. Why don't we define M_NLCON as optional and default it to 0 when it is absent?
This is because we need to allocate memory for CONSTR_LOC using M_NLCON. To
ensure that the size of CONSTR_LOC is correct, we require the user to specify
M_NLCON explicitly.
X
Input, vector.
As an input, X should be an N-dimensional vector that contains the starting
point, N being the dimension of the problem.
Aineq, Bineq
Input, matrix of size [Mineq, N] and vector of size Mineq unless they are both
empty, default: None and None.
Aineq and Bineq represent the linear inequality constraints: Aineq*X <= Bineq.
Aeq, Beq
Input, matrix of size [Meq, N] and vector of size Meq unless they are both
empty, default: None and None.
Aeq and Beq represent the linear equality constraints: Aeq*X = Beq.
XL, XU
Input, vectors of size N unless they are both None, default: None and None.
XL is the lower bound for X. If XL is None, X has no
lower bound. Any entry of XL that is NaN or below -BOUNDMAX will be taken as
-BOUNDMAX, which effectively means there is no lower bound for the corresponding
entry of X. The value of BOUNDMAX is 0.25*HUGE(X), which is about 8.6E37 for
single precision and 4.5E307 for double precision. XU is similar.
F0
Input, scalar.
F0, if present, should be set to the objective function value of the starting X.
NLCONSTR0
Input, vector.
NLCONSTR0, if present, should be set to the nonlinear constraint value at the
starting X; in addition, SIZE(NLCONSTR0) must be M_NLCON, or the solver will
abort.
RHOBEG, RHOEND
Inputs, scalars, default: RHOBEG = 1, RHOEND = 10^-6. RHOBEG and RHOEND must be
set to the initial and final values of a trust-region radius, both being positive
and RHOEND <= RHOBEG. Typically RHOBEG should be about one tenth of the greatest
expected change to a variable, and RHOEND should indicate the accuracy that is
required in the final values of the variables.
FTARGET
Input, scalar, default: -Inf.
FTARGET is the target function value. The algorithm will terminate when a
feasible point with a function value <= FTARGET is found.
CTOL
Input, scalar, default: sqrt(machine epsilon).
CTOL is the tolerance of constraint violation. X is considered feasible if
CSTRV(X) <= CTOL.
N.B.:
1. CTOL is absolute, not relative.
2. CTOL is used only when selecting the returned X. It does not affect the
iterations of the algorithm.
CWEIGHT
Input, scalar, default: CWEIGHT_DFT defined in common/consts.py.
CWEIGHT is the weight that the constraint violation takes in the selection of the
returned X.
MAXFUN
Input, integer scalar, default: MAXFUN_DIM_DFT*N with MAXFUN_DIM_DFT defined in
common/consts.py. MAXFUN is the maximal number of calls of CALCFC.
IPRINT
Input, integer scalar, default: 0.
The value of IPRINT should be set to 0, 1, -1, 2, -2, 3, or -3, which controls
how much information will be printed during the computation:
0: there will be no printing;
1: a message will be printed to the screen at the return, showing the best vector
of variables found and its objective function value;
2: in addition to 1, each new value of RHO is printed to the screen, with the
best vector of variables so far and its objective function value; each new
value of CPEN is also printed;
3: in addition to 2, each function evaluation with its variables will be printed
to the screen; -1, -2, -3: the same information as 1, 2, 3 will be printed,
not to the screen but to a file named COBYLA_output.txt; the file will be
created if it does not exist; the new output will be appended to the end of
this file if it already exists.
Note that IPRINT = +/-3 can be costly in terms of time and/or space.
ETA1, ETA2, GAMMA1, GAMMA2
Input, scalars, default: ETA1 = 0.1, ETA2 = 0.7, GAMMA1 = 0.5, and GAMMA2 = 2.
ETA1, ETA2, GAMMA1, and GAMMA2 are parameters in the updating scheme of the
trust-region radius detailed in the subroutine TRRAD in trustregion.py. Roughly
speaking, the trust-region radius is contracted by a factor of GAMMA1 when the
reduction ratio is below ETA1, and enlarged by a factor of GAMMA2 when the
reduction ratio is above ETA2. It is required that 0 < ETA1 <= ETA2 < 1 and
0 < GAMMA1 < 1 < GAMMA2. Normally, ETA1 <= 0.25. It is NOT advised to set
ETA1 >= 0.5.
MAXFILT
Input, scalar.
MAXFILT is a nonnegative integer indicating the maximal length of the filter used
for selecting the returned solution; default: MAXFILT_DFT (a value lower than
MIN_MAXFILT is not recommended);
see common/consts.py for the definitions of MAXFILT_DFT and MIN_MAXFILT.
CALLBACK
Input, function to report progress and optionally request termination.
####################################################################################
# OUTPUTS
####################################################################################
The output is a single data structure, COBYLAResult, with the following fields:
X
Output, vector.
As an output, X will be set to an approximate minimizer.
F
Output, scalar.
F will be set to the objective function value of X at exit.
CONSTR
Output, vector.
CONSTR will be set to the constraint value of X at exit.
CSTRV
Output, scalar.
CSTRV will be set to the constraint violation of X at exit, i.e.,
max([0, XL - X, X - XU, Aineq*X - Bineq, ABS(Aeq*X -Beq), NLCONSTR(X)]).
NF
Output, scalar.
NF will be set to the number of calls of CALCFC at exit.
XHIST, FHIST, CHIST, CONHIST, MAXHIST
XHIST: Output, rank 2 array;
FHIST: Output, rank 1 array;
CHIST: Output, rank 1 array;
CONHIST: Output, rank 2 array;
MAXHIST: Input, scalar, default: MAXFUN
XHIST, if present, will output the history of iterates; FHIST, if present, will
output the history function values; CHIST, if present, will output the history of
constraint violations; CONHIST, if present, will output the history of constraint
values; MAXHIST should be a nonnegative integer, and XHIST/FHIST/CHIST/CONHIST
will output only the history of the last MAXHIST iterations.
Therefore, MAXHIST= 0 means XHIST/FHIST/CONHIST/CHIST will output
nothing, while setting MAXHIST = MAXFUN requests XHIST/FHIST/CHIST/CONHIST to
output all the history. If XHIST is present, its size at exit will be
(N, min(NF, MAXHIST)); if FHIST is present, its size at exit will be
min(NF, MAXHIST); if CHIST is present, its size at exit will be min(NF, MAXHIST);
if CONHIST is present, its size at exit will be (M, min(NF, MAXHIST)).
IMPORTANT NOTICE:
Setting MAXHIST to a large value can be costly in terms of memory for large
problems.
MAXHIST will be reset to a smaller value if the memory needed exceeds MAXHISTMEM
defined in common/consts.py
Use *HIST with caution!!! (N.B.: the algorithm is NOT designed for large
problems).
INFO
Output, scalar.
INFO is the exit flag. It will be set to one of the following values defined in
common/infos.py:
SMALL_TR_RADIUS: the lower bound for the trust region radius is reached;
FTARGET_ACHIEVED: the target function value is reached;
MAXFUN_REACHED: the objective function has been evaluated MAXFUN times;
MAXTR_REACHED: the trust region iteration has been performed MAXTR times (MAXTR = 2*MAXFUN);
NAN_INF_X: NaN or Inf occurs in X;
DAMAGING_ROUNDING: rounding errors are becoming damaging.
#--------------------------------------------------------------------------#
The following case(s) should NEVER occur unless there is a bug.
NAN_INF_F: the objective function returns NaN or +Inf;
NAN_INF_MODEL: NaN or Inf occurs in the model;
TRSUBP_FAILED: a trust region step failed to reduce the model
#--------------------------------------------------------------------------#
"""
# Local variables
solver = "COBYLA"
srname = "COBYLA"
# Sizes
mineq = len(bineq) if present(bineq) else 0
meq = len(beq) if present(beq) else 0
mxl = sum(xl > -BOUNDMAX) if present(xl) else 0
mxu = sum(xu < BOUNDMAX) if present(xu) else 0
mmm = mxu + mxl + 2*meq + mineq + m_nlcon
num_vars = len(x)
# Preconditions
if DEBUGGING:
assert m_nlcon >= 0, f'{srname} M_NLCON >= 0'
assert num_vars >= 1, f'{srname} N >= 1'
assert present(Aineq) == present(bineq), \
f'{srname} Aineq and Bineq are both present or both absent'
if (present(Aineq)):
assert Aineq.shape == (mineq, num_vars), f'{srname} SIZE(Aineq) == [Mineq, N]'
assert present(Aeq) == present(beq), \
f'{srname} Aeq and Beq are both present or both absent'
if (present(Aeq)):
assert Aeq.shape == (meq, num_vars), f'{srname} SIZE(Aeq) == [Meq, N]'
if (present(xl)):
assert len(xl) == num_vars, f'{srname} SIZE(XL) == N'
if (present(xu)):
assert len(xu) == num_vars, f'{srname} SIZE(XU) == N'
# N.B.: If NLCONSTR0 is present, then F0 must be present, and we assume that
# F(X0) = F0 even if F0 is NaN; if NLCONSTR0 is absent, then F0 must be either
# absent or NaN, both of which will be interpreted as F(X0) is not provided.
if present(nlconstr0):
assert present(f0), f'{srname} If NLCONSTR0 is present, then F0 is present'
if present(f0):
assert np.isnan(f0) or present(nlconstr0), \
f'{srname} If F0 is present and not NaN, then NLCONSTR0 is present'
# Exit if the size of NLCONSTR0 is inconsistent with M_NLCON.
if present(nlconstr0):
assert np.size(nlconstr0) == m_nlcon
# Read the inputs.
if xl is not None:
xl = copy(xl)
xl[np.isnan(xl)] = -BOUNDMAX
xl[xl < -BOUNDMAX] = -BOUNDMAX
if xu is not None:
xu = copy(xu)
xu[np.isnan(xu)] = BOUNDMAX
xu[xu > BOUNDMAX] = BOUNDMAX
# Wrap the linear and bound constraints into a single constraint: AMAT@X <= BVEC.
amat, bvec = get_lincon(Aeq, Aineq, beq, bineq, xl, xu)
# Create constraint vector
constr = np.zeros(mmm)
# Set [F_LOC, CONSTR_LOC] to [F(X0), CONSTR(X0)] after evaluating the latter if
# needed. In this way, COBYLB only needs one interface.
# N.B.: Due to the preconditions above, there are two possibilities for F0 and
# NLCONSTR0.
# If NLCONSTR0 is present, then F0 must be present, and we assume that F(X0) = F0
# even if F0 is NaN.
# If NLCONSTR0 is absent, then F0 must be either absent or NaN, both of which will
# be interpreted as F(X0) is not provided and we have to evaluate F(X0) and
# NLCONSTR(X0) now.
if (present(f0) and present(nlconstr0) and all(np.isfinite(x))):
f = moderatef(f0)
if amat is not None:
constr[:mmm - m_nlcon] = moderatec(matprod(amat, x) - bvec)
constr[mmm - m_nlcon:] = moderatec(nlconstr0)
else:
x = moderatex(x)
f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
constr[:mmm - m_nlcon] = moderatec(constr[:mmm - m_nlcon])
# N.B.: Do NOT call FMSG, SAVEHIST, or SAVEFILT for the function/constraint evaluation at X0.
# They will be called during the initialization, which will read the function/constraint at X0.
cstrv = max(np.append(0, constr))
# If RHOBEG is present, use it; otherwise, RHOBEG takes the default value for
# RHOBEG, taking the value of RHOEND into account. Note that RHOEND is considered
# only if it is present and it is VALID (i.e., finite and positive). The other
# inputs are read similarly.
if present(rhobeg):
rhobeg = rhobeg
elif present(rhoend) and np.isfinite(rhoend) and rhoend > 0:
rhobeg = max(10 * rhoend, RHOBEG_DEFAULT)
else:
rhobeg = RHOBEG_DEFAULT
if present(rhoend):
rhoend = rhoend
elif rhobeg > 0:
rhoend = max(EPS, min(RHOEND_DEFAULT/RHOBEG_DEFAULT * rhobeg, RHOEND_DEFAULT))
else:
rhoend = RHOEND_DEFAULT
maxfun = maxfun if present(maxfun) else MAXFUN_DIM_DEFAULT * num_vars
if present(eta1):
eta1 = eta1
elif present(eta2) and 0 < eta2 < 1:
eta1 = max(EPS, eta2 / 7)
else:
eta1 = ETA1_DEFAULT
if present(eta2):
eta2 = eta2
elif 0 < eta1 < 1:
eta2 = (eta1 + 2) / 3
else:
eta2 = ETA2_DEFAULT
maxhist = (
maxhist
if present(maxhist)
else max(maxfun, num_vars + 2, MAXFUN_DIM_DEFAULT * num_vars)
)
# Preprocess the inputs in case some of them are invalid. It does nothing if all
# inputs are valid.
(
iprint,
maxfun,
maxhist,
ftarget,
rhobeg,
rhoend,
npt, # Unused in COBYLA
maxfilt,
ctol,
cweight,
eta1,
eta2,
gamma1,
gamma2,
_x0, # Unused in COBYLA
) = preproc(
solver,
num_vars,
iprint,
maxfun,
maxhist,
ftarget,
rhobeg,
rhoend,
num_constraints=mmm,
maxfilt=maxfilt,
ctol=ctol,
cweight=cweight,
eta1=eta1,
eta2=eta2,
gamma1=gamma1,
gamma2=gamma2,
is_constrained=(mmm > 0),
)
# Further revise MAXHIST according to MAXHISTMEM, and allocate memory for the history.
# In MATLAB/Python/Julia/R implementation, we should simply set MAXHIST = MAXFUN and initialize
# CHIST = NaN(1, MAXFUN), CONHIST = NaN(M, MAXFUN), FHIST = NaN(1, MAXFUN), XHIST = NaN(N, MAXFUN)
# if they are requested; replace MAXFUN with 0 for the history that is not requested.
# prehist(maxhist, num_vars, present(xhist), xhist_loc, present(fhist), fhist_loc, &
# & present(chist), chist_loc, m, present(conhist), conhist_loc)
# call cobylb, which performs the real calculations
x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info = cobylb(
calcfc,
iprint,
maxfilt,
maxfun,
amat,
bvec,
ctol,
cweight,
eta1,
eta2,
ftarget,
gamma1,
gamma2,
rhobeg,
rhoend,
constr,
f,
x,
maxhist,
callback
)
return COBYLAResult(x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info)
def get_lincon(Aeq=None, Aineq=None, beq=None, bineq=None, xl=None, xu=None):
"""
This subroutine wraps the linear and bound constraints into a single constraint:
AMAT*X <= BVEC.
N.B.:
LINCOA normalizes the linear constraints so that each constraint has a gradient
of norm 1. However, COBYLA does not do this.
"""
# Sizes
if Aeq is not None:
num_vars = Aeq.shape[1]
elif Aineq is not None:
num_vars = Aineq.shape[1]
elif xl is not None:
num_vars = len(xl)
elif xu is not None:
num_vars = len(xu)
else:
return None, None
# Preconditions
if DEBUGGING:
assert Aineq is None or Aineq.shape == (len(bineq), num_vars)
assert Aeq is None or Aeq.shape == (len(beq), num_vars)
assert (xl is None or xu is None) or len(xl) == len(xu) == num_vars
#====================#
# Calculation starts #
#====================#
# Define the indices of the nontrivial bound constraints.
ixl = np.where(xl > -BOUNDMAX)[0] if xl is not None else None
ixu = np.where(xu < BOUNDMAX)[0] if xu is not None else None
# Wrap the linear constraints.
# The bound constraint XL <= X <= XU is handled as two constraints:
# -X <= -XL, X <= XU.
# The equality constraint Aeq*X = Beq is handled as two constraints:
# -Aeq*X <= -Beq, Aeq*X <= Beq.
# N.B.:
# 1. The treatment of the equality constraints is naive. One may choose to
# eliminate them instead.
idmat = np.eye(num_vars)
amat = np.vstack([
-idmat[ixl, :] if ixl is not None else np.empty((0, num_vars)),
idmat[ixu, :] if ixu is not None else np.empty((0, num_vars)),
-Aeq if Aeq is not None else np.empty((0, num_vars)),
Aeq if Aeq is not None else np.empty((0, num_vars)),
Aineq if Aineq is not None else np.empty((0, num_vars))
])
bvec = np.hstack([
-xl[ixl] if ixl is not None else np.empty(0),
xu[ixu] if ixu is not None else np.empty(0),
-beq if beq is not None else np.empty(0),
beq if beq is not None else np.empty(0),
bineq if bineq is not None else np.empty(0)
])
amat = amat if amat.shape[0] > 0 else None
bvec = bvec if bvec.shape[0] > 0 else None
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert (amat is None and bvec is None) or amat.shape == (len(bvec), num_vars)
return amat, bvec

View File

@@ -0,0 +1,714 @@
'''
This module performs the major calculations of COBYLA.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
import numpy as np
from ..common.checkbreak import checkbreak_con
from ..common.consts import REALMAX, EPS, DEBUGGING, MIN_MAXFILT
from ..common.infos import INFO_DEFAULT, MAXTR_REACHED, DAMAGING_ROUNDING, \
SMALL_TR_RADIUS, CALLBACK_TERMINATE
from ..common.evaluate import evaluate
from ..common.history import savehist
from ..common.linalg import isinv, matprod, inprod, norm, primasum, primapow2
from ..common.message import fmsg, retmsg, rhomsg
from ..common.ratio import redrat
from ..common.redrho import redrho
from ..common.selectx import savefilt, selectx
from .update import updatepole, findpole, updatexfc
from .geometry import setdrop_tr, geostep
from .trustregion import trstlp, trrad
from .initialize import initxfc, initfilt
def cobylb(calcfc, iprint, maxfilt, maxfun, amat, bvec, ctol, cweight, eta1, eta2,
ftarget, gamma1, gamma2, rhobeg, rhoend, constr, f, x, maxhist, callback):
'''
This subroutine performs the actual computations of COBYLA.
'''
# Outputs
xhist = []
fhist = []
chist = []
conhist = []
# Local variables
solver = 'COBYLA'
A = np.zeros((np.size(x), np.size(constr))) # A contains the approximate gradient for the constraints
distsq = np.zeros(np.size(x) + 1)
# CPENMIN is the minimum of the penalty parameter CPEN for the L-infinity
# constraint violation in the merit function. Note that CPENMIN = 0 in Powell's
# implementation, which allows CPEN to be 0. Here, we take CPENMIN > 0 so that CPEN
# is always positive. This avoids the situation where PREREM becomes 0 when
# PREREF = 0 = CPEN. It brings two advantages as follows.
# 1. If the trust-region subproblem solver works correctly and the trust-region
# center is not optimal for the subproblem, then PREREM > 0 is guaranteed. This
# is because, in theory, PREREC >= 0 and MAX(PREREC, PREREF) > 0, and the
# definition of CPEN in GETCPEN ensures that PREREM > 0.
# 2. There is no need to revise ACTREM and PREREM when CPEN = 0 and F = FVAL(N+1)
# as in lines 312--314 of Powell's cobylb.f code. Powell's code revises ACTREM
# to CVAL(N + 1) - CSTRV and PREREM to PREREC in this case, which is crucial for
# feasibility problems.
cpenmin = EPS
# Sizes
m_lcon = np.size(bvec) if bvec is not None else 0
num_constraints = np.size(constr)
m_nlcon = num_constraints - m_lcon
num_vars = np.size(x)
# Preconditions
if DEBUGGING:
assert abs(iprint) <= 3
assert num_constraints >= m_lcon and m_lcon >= 0
assert num_vars >= 1
assert maxfun >= num_vars + 2
assert rhobeg >= rhoend and rhoend > 0
assert all(np.isfinite(x))
assert 0 <= eta1 <= eta2 < 1
assert 0 < gamma1 < 1 < gamma2
assert 0 <= ctol
assert 0 <= cweight
assert 0 <= maxhist <= maxfun
assert amat is None or np.shape(amat) == (m_lcon, num_vars)
assert min(MIN_MAXFILT, maxfun) <= maxfilt <= maxfun
#====================#
# Calculation starts #
#====================#
# Initialize SIM, FVAL, CONMAT, and CVAL, together with the history.
# After the initialization, SIM[:, NUM_VARS] holds the vertex of the initial
# simplex with the smallest function value (regardless of the constraint
# violation), and SIM[:, :NUM_VARS] holds the displacements from the other vertices
# to SIM[:, NUM_VARS]. FVAL, CONMAT, and CVAL hold the function values, constraint
# values, and constraint violations on the vertices in the order corresponding to
# SIM.
evaluated, conmat, cval, sim, simi, fval, nf, subinfo = initxfc(calcfc, iprint,
maxfun, constr, amat, bvec, ctol, f, ftarget, rhobeg, x,
xhist, fhist, chist, conhist, maxhist)
# Initialize the filter, including xfilt, ffilt, confilt, cfilt, and nfilt.
# N.B.: The filter is used only when selecting which iterate to return. It does not
# interfere with the iterations. COBYLA is NOT a filter method but a trust-region
# method based on an L-infinity merit function. Powell's implementation does not
# use a filter to select the iterate, possibly returning a suboptimal iterate.
cfilt = np.zeros(np.minimum(np.maximum(maxfilt, 1), maxfun))
confilt = np.zeros((np.size(constr), np.size(cfilt)))
ffilt = np.zeros(np.size(cfilt))
xfilt = np.zeros((np.size(x), np.size(cfilt)))
nfilt = initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt,
ffilt, xfilt)
# Check whether to return due to abnormal cases that may occur during the initialization.
if subinfo != INFO_DEFAULT:
info = subinfo
# Return the best calculated values of the variables
# N.B: Selectx and findpole choose X by different standards, one cannot replace the other
kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], cweight, ctol)
x = xfilt[:, kopt]
f = ffilt[kopt]
constr = confilt[:, kopt]
cstrv = cfilt[kopt]
# print a return message according to IPRINT.
retmsg(solver, info, iprint, nf, f, x, cstrv, constr)
# Postconditions
if DEBUGGING:
assert nf <= maxfun
assert np.size(x) == num_vars and not any(np.isnan(x))
assert not (np.isnan(f) or np.isposinf(f))
# assert np.size(xhist, 0) == n and np.size(xhist, 1) == maxxhist
# assert not any(np.isnan(xhist(:, 1:min(nf, maxxhist))))
# The last calculated X can be Inf (finite + finite can be Inf numerically).
# assert np.size(fhist) == maxfhist
# assert not any(np.isnan(fhist(1:min(nf, maxfhist))) or np.isposinf(fhist(1:min(nf, maxfhist))))
# assert np.size(conhist, 0) == m and np.size(conhist, 1) == maxconhist
# assert not any(np.isnan(conhist(:, 1:min(nf, maxconhist))) or np.isneginf(conhist(:, 1:min(nf, maxconhist))))
# assert np.size(chist) == maxchist
# assert not any(chist(1:min(nf, maxchist)) < 0 or np.isnan(chist(1:min(nf, maxchist))) or np.isposinf(chist(1:min(nf, maxchist))))
# nhist = minval([nf, maxfhist, maxchist])
# assert not any(isbetter(fhist(1:nhist), chist(1:nhist), f, cstrv, ctol))
return x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info
# Set some more initial values.
# We must initialize shortd, ratio, and jdrop_tr because these get defined on
# branches that are not guaranteed to be executed, but their values are used later.
# Our initialization of CPEN differs from Powell's in two ways. First, we use the
# ratio defined in (13) of Powell's COBYLA paper to initialize CPEN. Second, we
# impose CPEN >= CPENMIN > 0. Powell's code simply initializes CPEN to 0.
rho = rhobeg
delta = rhobeg
cpen = np.maximum(cpenmin, np.minimum(1.0E3, fcratio(conmat, fval))) # Powell's code: CPEN = ZERO
shortd = False
ratio = -1
jdrop_tr = 0
# If DELTA <= GAMMA3*RHO after an update, we set DELTA to RHO. GAMMA3 must be less
# than GAMMA2. The reason is as follows. Imagine a very successful step with
# DNORM = the un-updated DELTA = RHO. The TRRAD will update DELTA to GAMMA2*RHO.
# If GAMMA3 >= GAMMA2, then DELTA will be reset to RHO, which is not reasonable as
# D is very successful. See paragraph two of Sec 5.2.5 in T. M. Ragonneau's thesis:
# "Model-Based Derivative-Free Optimization Methods and Software." According to
# test on 20230613, for COBYLA, this Powellful updating scheme of DELTA works
# slightly better than setting directly DELTA = max(NEW_DELTA, RHO).
gamma3 = np.maximum(1, np.minimum(0.75 * gamma2, 1.5))
# MAXTR is the maximal number of trust region iterations. Each trust-region
# iteration takes 1 or 2 function evaluations unless the trust-region step is short
# or the trust-region subproblem solver fails but the geometry step is not invoked.
# Thus the following MAXTR is unlikely to be reached.
maxtr = 10 * maxfun
info = MAXTR_REACHED
# Begin the iterative procedure
# After solving a trust-region subproblem, we use three boolean variables to
# control the workflow.
# SHORTD - Is the trust-region trial step too short to invoke # a function
# evaluation?
# IMPROVE_GEO - Will we improve the model after the trust-region iteration? If yes,
# a geometry step will be taken, corresponding to the "Branch (Delta)"
# in the COBYLA paper.
# REDUCE_RHO - Will we reduce rho after the trust-region iteration?
# COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously.
for tr in range(maxtr):
# Increase the penalty parameter CPEN, if needed, so that
# PREREM = PREREF + CPEN * PREREC > 0.
# This is the first (out of two) update of CPEN, where CPEN increases or
# remains the same.
# N.B.: CPEN and the merit function PHI = FVAL + CPEN*CVAL are used in three
# places only.
# 1. In FINDPOLE/UPDATEPOLE, deciding the optimal vertex of the current simplex.
# 2. After the trust-region trial step, calculating the reduction ratio.
# 3. In GEOSTEP, deciding the direction of the geometry step.
# They do not appear explicitly in the trust-region subproblem, though the
# trust-region center (i.e. the current optimal vertex) is defined by them.
cpen = getcpen(amat, bvec, conmat, cpen, cval, delta, fval, rho, sim, simi)
# Switch the best vertex of the current simplex to SIM[:, NUM_VARS].
conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval,
sim, simi)
# Check whether to exit due to damaging rounding in UPDATEPOLE.
if subinfo == DAMAGING_ROUNDING:
info = subinfo
break # Better action to take? Geometry step, or simply continue?
# Does the interpolation set have adequate geometry? It affects improve_geo and
# reduce_rho.
adequate_geo = all(primasum(primapow2(sim[:, :num_vars]), axis=0) <= 4 * primapow2(delta))
# Calculate the linear approximations to the objective and constraint functions.
# N.B.: TRSTLP accesses A mostly by columns, so it is more reasonable to save A
# instead of A^T.
# Zaikun 2023108: According to a test on 2023108, calculating G and
# A(:, M_LCON+1:M) by solving the linear systems SIM^T*G = FVAL(1:N)-FVAL(N+1)
# and SIM^T*A = CONMAT(:, 1:N)-CONMAT(:, N+1) does not seem to improve or worsen
# the performance of COBYLA in terms of the number of function evaluations. The
# system was solved by SOLVE in LINALG_MOD based on a QR factorization of SIM
# (not necessarily a good algorithm). No preconditioning or scaling was used.
g = matprod((fval[:num_vars] - fval[num_vars]), simi)
A[:, :m_lcon] = amat.T if amat is not None else amat
A[:, m_lcon:] = matprod((conmat[m_lcon:, :num_vars] -
np.tile(conmat[m_lcon:, num_vars], (num_vars, 1)).T), simi).T
# Calculate the trust-region trial step d. Note that d does NOT depend on cpen.
d = trstlp(A, -conmat[:, num_vars], delta, g)
dnorm = min(delta, norm(d))
# Is the trust-region trial step short? N.B.: we compare DNORM with RHO, not
# DELTA. Powell's code especially defines SHORTD by SHORTD = (DNORM < 0.5 *
# RHO). In our tests 1/10 seems to work better than 1/2 or 1/4, especially for
# linearly constrained problems. Note that LINCOA has a slightly more
# sophisticated way of defining SHORTD, taking into account whether D causes a
# change to the active set. Should we try the same here?
shortd = (dnorm <= 0.1 * rho)
# Predict the change to F (PREREF) and to the constraint violation (PREREC) due
# to D. We have the following in precise arithmetic. They may fail to hold due
# to rounding errors.
# 1. B[:NUM_CONSTRAINTS] = -CONMAT[:, NUM_VARS] and hence
# np.max(np.append(B[:NUM_CONSTRAINTS] - D@A[:, :NUM_CONSTRAINTS], 0)) is the
# L-infinity violation of the linearized constraints corresponding to D. When
# D=0, the violation is np.max(np.append(B[:NUM_CONSTRAINTS], 0)) =
# CVAL[NUM_VARS]. PREREC is the reduction of this violation achieved by D,
# which is nonnegative in theory; PREREC = 0 iff B[:NUM_CONSTRAINTS] <= 0, i.e.
# the trust-region center satisfies the linearized constraints.
# 2. PREREF may be negative or 0, but it is positive when PREREC = 0 and shortd
# is False
# 3. Due to 2, in theory, max(PREREC, PREREF) > 0 if shortd is False.
preref = -inprod(d, g) # Can be negative
prerec = cval[num_vars] - np.max(np.append(0, conmat[:, num_vars] + matprod(d, A)))
# Evaluate PREREM, which is the predicted reduction in the merit function.
# In theory, PREREM >= 0 and it is 0 iff CPEN = 0 = PREREF. This may not be true
# numerically.
prerem = preref + cpen * prerec
trfail = not (prerem > 1.0E-6 * min(cpen, 1) * rho)
if shortd or trfail:
# Reduce DELTA if D is short or if D fails to render PREREM > 0. The latter
# can only happen due to rounding errors. This seems quite important for
# performance
delta *= 0.1
if delta <= gamma3 * rho:
delta = rho # set delta to rho when it is close to or below
else:
# Calculate the next value of the objective and constraint functions.
# If X is close to one of the points in the interpolation set, then we do
# not evaluate the objective and constraints at X, assuming them to have
# the values at the closest point.
# N.B.: If this happens, do NOT include X into the filter, as F and CONSTR
# are inaccurate.
x = sim[:, num_vars] + d
distsq[num_vars] = primasum(primapow2(x - sim[:, num_vars]))
distsq[:num_vars] = primasum(primapow2(x.reshape(num_vars, 1) -
(sim[:, num_vars].reshape(num_vars, 1) + sim[:, :num_vars])), axis=0)
j = np.argmin(distsq)
if distsq[j] <= primapow2(1e-4 * rhoend):
f = fval[j]
constr = conmat[:, j]
cstrv = cval[j]
else:
# Evaluate the objective and constraints at X, taking care of possible
# inf/nan values.
f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
cstrv = np.max(np.append(0, constr))
nf += 1
# Save X, F, CONSTR, CSTRV into the history.
savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist)
# Save X, F, CONSTR, CSTRV into the filter.
nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f,
x, nfilt, cfilt, ffilt,
xfilt, constr, confilt)
# Print a message about the function/constraint evaluation according to
# iprint
fmsg(solver, 'Trust region', iprint, nf, delta, f, x, cstrv, constr)
# Evaluate ACTREM, which is the actual reduction in the merit function
actrem = (fval[num_vars] + cpen * cval[num_vars]) - (f + cpen * cstrv)
# Calculate the reduction ratio by redrat, which hands inf/nan carefully
ratio = redrat(actrem, prerem, eta1)
# Update DELTA. After this, DELTA < DNORM may hold.
# N.B.:
# 1. Powell's code uses RHO as the trust-region radius and updates it as
# follows.
# Reduce RHO to GAMMA1*RHO if ADEQUATE_GEO is TRUE and either SHORTD is
# TRUE or RATIO < ETA1, and then revise RHO to RHOEND if its new value is
# not more than GAMMA3*RHOEND; RHO remains unchanged in all other cases;
# in particular, RHO is never increased.
# 2. Our implementation uses DELTA as the trust-region radius, while using
# RHO as a lower bound for DELTA. DELTA is updated in a way that is
# typical for trust-region methods, and it is revised to RHO if its new
# value is not more than GAMMA3*RHO. RHO reflects the current resolution
# of the algorithm; its update is essentially the same as the update of
# RHO in Powell's code (see the definition of REDUCE_RHO below). Our
# implementation aligns with UOBYQA/NEWUOA/BOBYQA/LINCOA and improves the
# performance of COBYLA.
# 3. The same as Powell's code, we do not reduce RHO unless ADEQUATE_GEO is
# TRUE. This is also how Powell updated RHO in
# UOBYQA/NEWUOA/BOBYQA/LINCOA. What about we also use ADEQUATE_GEO ==
# TRUE as a prerequisite for reducing DELTA? The argument would be that
# the bad (small) value of RATIO may be because of a bad geometry (and
# hence a bad model) rather than an improperly large DELTA, and it might
# be good to try improving the geometry first without reducing DELTA.
# However, according to a test on 20230206, it does not improve the
# performance if we skip the update of DELTA when ADEQUATE_GEO is FALSE
# and RATIO < 0.1. Therefore, we choose to update DELTA without checking
# ADEQUATE_GEO.
delta = trrad(delta, dnorm, eta1, eta2, gamma1, gamma2, ratio)
if delta <= gamma3*rho:
delta = rho # Set delta to rho when it is close to or below.
# Is the newly generated X better than the current best point?
ximproved = actrem > 0 # If ACTREM is NaN, then XIMPROVED should and will be False
# Set JDROP_TR to the index of the vertex to be replaced with X. JDROP_TR = 0 means there
# is no good point to replace, and X will not be included into the simplex; in this case,
# the geometry of the simplex likely needs improvement, which will be handled below.
jdrop_tr = setdrop_tr(ximproved, d, delta, rho, sim, simi)
# Update SIM, SIMI, FVAL, CONMAT, and CVAL so that SIM[:, JDROP_TR] is replaced with D.
# UPDATEXFC does nothing if JDROP_TR is None, as the algorithm decides to discard X.
sim, simi, fval, conmat, cval, subinfo = updatexfc(jdrop_tr, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi)
# Check whether to break due to damaging rounding in UPDATEXFC
if subinfo == DAMAGING_ROUNDING:
info = subinfo
break # Better action to take? Geometry step, or a RESCUE as in BOBYQA?
# Check whether to break due to maxfun, ftarget, etc.
subinfo = checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x)
if subinfo != INFO_DEFAULT:
info = subinfo
break
# End of if SHORTD or TRFAIL. The normal trust-region calculation ends.
# Before the next trust-region iteration, we possibly improve the geometry of the simplex or
# reduce RHO according to IMPROVE_GEO and REDUCE_RHO. Now we decide these indicators.
# N.B.: We must ensure that the algorithm does not set IMPROVE_GEO = True at infinitely many
# consecutive iterations without moving SIM[:, NUM_VARS] or reducing RHO. Otherwise, the algorithm
# will get stuck in repetitive invocations of GEOSTEP. This is ensured by the following facts:
# 1. If an iteration sets IMPROVE_GEO to True, it must also reduce DELTA or set DELTA to RHO.
# 2. If SIM[:, NUM_VARS] and RHO remain unchanged, then ADEQUATE_GEO will become True after at
# most NUM_VARS invocations of GEOSTEP.
# BAD_TRSTEP: Is the last trust-region step bad?
bad_trstep = shortd or trfail or ratio <= 0 or jdrop_tr is None
# IMPROVE_GEO: Should we take a geometry step to improve the geometry of the interpolation set?
improve_geo = bad_trstep and not adequate_geo
# REDUCE_RHO: Should we enhance the resolution by reducing rho?
reduce_rho = bad_trstep and adequate_geo and max(delta, dnorm) <= rho
# COBYLA never sets IMPROVE_GEO and REDUCE_RHO to True simultaneously.
# assert not (IMPROVE_GEO and REDUCE_RHO), 'IMPROVE_GEO or REDUCE_RHO are not both TRUE, COBYLA'
# If SHORTD or TRFAIL is True, then either IMPROVE_GEO or REDUCE_RHO is True unless ADEQUATE_GEO
# is True and max(DELTA, DNORM) > RHO.
# assert not (shortd or trfail) or (improve_geo or reduce_rho or (adequate_geo and max(delta, dnorm) > rho)), \
# 'If SHORTD or TRFAIL is TRUE, then either IMPROVE_GEO or REDUCE_RHO is TRUE unless ADEQUATE_GEO is TRUE and MAX(DELTA, DNORM) > RHO'
# Comments on BAD_TRSTEP:
# 1. Powell's definition of BAD_TRSTEP is as follows. The one used above seems to work better,
# especially for linearly constrained problems due to the factor TENTH (= ETA1).
# !bad_trstep = (shortd .or. actrem <= 0 .or. actrem < TENTH * prerem .or. jdrop_tr == 0)
# Besides, Powell did not check PREREM > 0 in BAD_TRSTEP, which is reasonable to do but has
# little impact upon the performance.
# 2. NEWUOA/BOBYQA/LINCOA would define BAD_TRSTEP, IMPROVE_GEO, and REDUCE_RHO as follows. Two
# different thresholds are used in BAD_TRSTEP. It outperforms Powell's version.
# !bad_trstep = (shortd .or. trfail .or. ratio <= eta1 .or. jdrop_tr == 0)
# !improve_geo = bad_trstep .and. .not. adequate_geo
# !bad_trstep = (shortd .or. trfail .or. ratio <= 0 .or. jdrop_tr == 0)
# !reduce_rho = bad_trstep .and. adequate_geo .and. max(delta, dnorm) <= rho
# 3. Theoretically, JDROP_TR > 0 when ACTREM > 0 (guaranteed by RATIO > 0). However, in Powell's
# implementation, JDROP_TR may be 0 even RATIO > 0 due to NaN. The modernized code has rectified
# this in the function SETDROP_TR. After this rectification, we can indeed simplify the
# definition of BAD_TRSTEP by removing the condition JDROP_TR == 0. We retain it for robustness.
# Comments on REDUCE_RHO:
# When SHORTD is TRUE, UOBYQA/NEWUOA/BOBYQA/LINCOA all set REDUCE_RHO to TRUE if the recent
# models are sufficiently accurate according to certain criteria. See the paragraph around (37)
# in the UOBYQA paper and the discussions about Box 14 in the NEWUOA paper. This strategy is
# crucial for the performance of the solvers. However, as of 20221111, we have not managed to
# make it work in COBYLA. As in NEWUOA, we recorded the errors of the recent models, and set
# REDUCE_RHO to true if they are small (e.g., ALL(ABS(MODERR_REC) <= 0.1 * MAXVAL(ABS(A))*RHO) or
# ALL(ABS(MODERR_REC) <= RHO**2)) when SHORTD is TRUE. It made little impact on the performance.
# Since COBYLA never sets IMPROVE_GEO and REDUCE_RHO to TRUE simultaneously, the following
# two blocks are exchangeable: IF (IMPROVE_GEO) ... END IF and IF (REDUCE_RHO) ... END IF.
# Improve the geometry of the simplex by removing a point and adding a new one.
# If the current interpolation set has acceptable geometry, then we skip the geometry step.
# The code has a small difference from Powell's original code here: If the current geometry
# is acceptable, then we will continue with a new trust-region iteration; however, at the
# beginning of the iteration, CPEN may be updated, which may alter the pole point SIM(:, N+1)
# by UPDATEPOLE; the quality of the interpolation point depends on SIM(:, N + 1), meaning
# that the same interpolation set may have good or bad geometry with respect to different
# "poles"; if the geometry turns out bad with the new pole, the original COBYLA code will
# take a geometry step, but our code here will NOT do it but continue to take a trust-region
# step. The argument is this: even if the geometry step is not skipped in the first place, the
# geometry may turn out bad again after the pole is altered due to an update to CPEN; should
# we take another geometry step in that case? If no, why should we do it here? Indeed, this
# distinction makes no practical difference for CUTEst problems with at most 100 variables
# and 5000 constraints, while the algorithm framework is simplified.
if improve_geo and not all(primasum(primapow2(sim[:, :num_vars]), axis=0) <= 4 * primapow2(delta)):
# Before the geometry step, updatepole has been called either implicitly by UPDATEXFC or
# explicitly after CPEN is updated, so that SIM[:, :NUM_VARS] is the optimal vertex.
# Decide a vertex to drop from the simplex. It will be replaced with SIM[:, NUM_VARS] + D to
# improve the geometry of the simplex.
# N.B.:
# 1. COBYLA never sets JDROP_GEO = num_vars.
# 2. The following JDROP_GEO comes from UOBYQA/NEWUOA/BOBYQA/LINCOA.
# 3. In Powell's original algorithm, the geometry of the simplex is considered acceptable
# iff the distance between any vertex and the pole is at most 2.1*DELTA, and the distance
# between any vertex and the opposite face of the simplex is at least 0.25*DELTA, as
# specified in (14) of the COBYLA paper. Correspondingly, JDROP_GEO is set to the index of
# the vertex with the largest distance to the pole provided that the distance is larger than
# 2.1*DELTA, or the vertex with the smallest distance to the opposite face of the simplex,
# in which case the distance must be less than 0.25*DELTA, as the current simplex does not
# have acceptable geometry (see (15)--(16) of the COBYLA paper). Once JDROP_GEO is set, the
# algorithm replaces SIM(:, JDROP_GEO) with D specified in (17) of the COBYLA paper, which
# is orthogonal to the face opposite to SIM(:, JDROP_GEO) and has a length of 0.5*DELTA,
# intending to improve the geometry of the simplex as per (14).
# 4. Powell's geometry-improving procedure outlined above has an intrinsic flaw: it may lead
# to infinite cycling, as was observed in a test on 20240320. In this test, the geometry-
# improving point introduced in the previous iteration was replaced with the trust-region
# trial point in the current iteration, which was then replaced with the same geometry-
# improving point in the next iteration, and so on. In this process, the simplex alternated
# between two configurations, neither of which had acceptable geometry. Thus RHO was never
# reduced, leading to infinite cycling. (N.B.: Our implementation uses DELTA as the trust
# region radius, with RHO being its lower bound. When the infinite cycling occurred in this
# test, DELTA = RHO and it could not be reduced due to the requirement that DELTA >= RHO.)
jdrop_geo = np.argmax(primasum(primapow2(sim[:, :num_vars]), axis=0), axis=0)
# Calculate the geometry step D.
delbar = delta/2
d = geostep(jdrop_geo, amat, bvec, conmat, cpen, cval, delbar, fval, simi)
# Calculate the next value of the objective and constraint functions.
# If X is close to one of the points in the interpolation set, then we do not evaluate the
# objective and constraints at X, assuming them to have the values at the closest point.
# N.B.:
# 1. If this happens, do NOT include X into the filter, as F and CONSTR are inaccurate.
# 2. In precise arithmetic, the geometry improving step ensures that the distance between X
# and any interpolation point is at least DELBAR, yet X may be close to them due to
# rounding. In an experiment with single precision on 20240317, X = SIM(:, N+1) occurred.
x = sim[:, num_vars] + d
distsq[num_vars] = primasum(primapow2(x - sim[:, num_vars]))
distsq[:num_vars] = primasum(primapow2(x.reshape(num_vars, 1) -
(sim[:, num_vars].reshape(num_vars, 1) + sim[:, :num_vars])), axis=0)
j = np.argmin(distsq)
if distsq[j] <= primapow2(1e-4 * rhoend):
f = fval[j]
constr = conmat[:, j]
cstrv = cval[j]
else:
# Evaluate the objective and constraints at X, taking care of possible
# inf/nan values.
f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
cstrv = np.max(np.append(0, constr))
nf += 1
# Save X, F, CONSTR, CSTRV into the history.
savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist)
# Save X, F, CONSTR, CSTRV into the filter.
nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f,
x, nfilt, cfilt, ffilt,
xfilt, constr, confilt)
# Print a message about the function/constraint evaluation according to iprint
fmsg(solver, 'Geometry', iprint, nf, delta, f, x, cstrv, constr)
# Update SIM, SIMI, FVAL, CONMAT, and CVAL so that SIM(:, JDROP_GEO) is replaced with D.
sim, simi, fval, conmat, cval, subinfo = updatexfc(jdrop_geo, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi)
# Check whether to break due to damaging rounding in UPDATEXFC
if subinfo == DAMAGING_ROUNDING:
info = subinfo
break # Better action to take? Geometry step, or simply continue?
# Check whether to break due to maxfun, ftarget, etc.
subinfo = checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x)
if subinfo != INFO_DEFAULT:
info = subinfo
break
# end of if improve_geo. The procedure of improving the geometry ends.
# The calculations with the current RHO are complete. Enhance the resolution of the algorithm
# by reducing RHO; update DELTA and CPEN at the same time.
if reduce_rho:
if rho <= rhoend:
info = SMALL_TR_RADIUS
break
delta = max(0.5 * rho, redrho(rho, rhoend))
rho = redrho(rho, rhoend)
# THe second (out of two) updates of CPEN, where CPEN decreases or remains the same.
# Powell's code: cpen = min(cpen, fcratio(fval, conmat)), which may set CPEN to 0.
cpen = np.maximum(cpenmin, np.minimum(cpen, fcratio(conmat, fval)))
# Print a message about the reduction of rho according to iprint
rhomsg(solver, iprint, nf, fval[num_vars], rho, sim[:, num_vars], cval[num_vars], conmat[:, num_vars], cpen)
conmat, cval, fval, sim, simi, subinfo = updatepole(cpen, conmat, cval, fval, sim, simi)
# Check whether to break due to damaging rounding detected in updatepole
if subinfo == DAMAGING_ROUNDING:
info = subinfo
break # Better action to take? Geometry step, or simply continue?
# End of if reduce_rho. The procedure of reducing RHO ends.
# Report the current best value, and check if user asks for early termination.
if callback:
terminate = callback(sim[:, num_vars], fval[num_vars], nf, tr, cval[num_vars], conmat[:, num_vars])
if terminate:
info = CALLBACK_TERMINATE
break
# End of for loop. The iterative procedure ends
# Return from the calculation, after trying the last trust-region step if it has not been tried yet.
# Ensure that D has not been updated after SHORTD == TRUE occurred, or the code below is incorrect.
x = sim[:, num_vars] + d
if (info == SMALL_TR_RADIUS and
shortd and
norm(x - sim[:, num_vars]) > 1.0E-3 * rhoend and
nf < maxfun):
# Zaikun 20230615: UPDATEXFC or UPDATEPOLE is not called since the last trust-region step. Hence
# SIM[:, NUM_VARS] remains unchanged. Otherwise SIM[:, NUM_VARS] + D would not make sense.
f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
cstrv = np.max(np.append(0, constr))
nf += 1
savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist)
nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr, confilt)
# Zaikun 20230512: DELTA has been updated. RHO is only indicative here. TO BE IMPROVED.
fmsg(solver, 'Trust region', iprint, nf, rho, f, x, cstrv, constr)
# Return the best calculated values of the variables
# N.B.: SELECTX and FINDPOLE choose X by different standards, one cannot replace the other.
kopt = selectx(ffilt[:nfilt], cfilt[:nfilt], max(cpen, cweight), ctol)
x = xfilt[:, kopt]
f = ffilt[kopt]
constr = confilt[:, kopt]
cstrv = cfilt[kopt]
# Print a return message according to IPRINT.
retmsg(solver, info, iprint, nf, f, x, cstrv, constr)
return x, f, constr, cstrv, nf, xhist, fhist, chist, conhist, info
def getcpen(amat, bvec, conmat, cpen, cval, delta, fval, rho, sim, simi):
'''
This function gets the penalty parameter CPEN so that PREREM = PREREF + CPEN * PREREC > 0.
See the discussions around equation (9) of the COBYLA paper.
'''
# Even after nearly all of the pycutest problems were showing nearly bit for bit
# identical results between Python and the Fortran bindings, HS102 was still off by
# more than machine epsilon. It turned out to be due to the fact that getcpen was
# modifying fval, among other. It just goes to show that even when you're nearly
# perfect, you can still have non trivial bugs.
conmat = conmat.copy()
cval = cval.copy()
fval = fval.copy()
sim = sim.copy()
simi = simi.copy()
# Intermediate variables
A = np.zeros((np.size(sim, 0), np.size(conmat, 0)))
itol = 1
# Sizes
m_lcon = np.size(bvec) if bvec is not None else 0
num_constraints = np.size(conmat, 0)
num_vars = np.size(sim, 0)
# Preconditions
if DEBUGGING:
assert num_constraints >= 0
assert num_vars >= 1
assert cpen > 0
assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1
assert not (np.isnan(conmat) | np.isneginf(conmat)).any()
assert np.size(cval) == num_vars + 1 and \
not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1
assert np.isfinite(sim).all()
assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0)
assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars
assert np.isfinite(simi).all()
assert isinv(sim[:, :num_vars], simi, itol)
assert delta >= rho and rho > 0
#====================#
# Calculation starts #
#====================#
# Initialize INFO which is needed in the postconditions
info = INFO_DEFAULT
# Increase CPEN if necessary to ensure PREREM > 0. Branch back for the next loop
# if this change alters the optimal vertex of the current simplex.
# Note the following:
# 1. In each loop, CPEN is changed only if PREREC > 0 > PREREF, in which case
# PREREM is guaranteed positive after the update. Note that PREREC >= 0 and
# max(PREREC, PREREF) > 0 in theory. If this holds numerically as well then CPEN
# is not changed only if PREREC = 0 or PREREF >= 0, in which case PREREM is
# currently positive, explaining why CPEN needs no update.
# 2. Even without an upper bound for the loop counter, the loop can occur at most
# NUM_VARS+1 times. This is because the update of CPEN does not decrease CPEN,
# and hence it can make vertex J (J <= NUM_VARS) become the new optimal vertex
# only if CVAL[J] is less than CVAL[NUM_VARS], which can happen at most NUM_VARS
# times. See the paragraph below (9) in the COBYLA paper. After the "correct"
# optimal vertex is found, one more loop is needed to calculate CPEN, and hence
# the loop can occur at most NUM_VARS+1 times.
for iter in range(num_vars + 1):
# Switch the best vertex of the current simplex to SIM[:, NUM_VARS]
conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim,
simi)
# Check whether to exit due to damaging rounding in UPDATEPOLE
if info == DAMAGING_ROUNDING:
break
# Calculate the linear approximations to the objective and constraint functions.
g = matprod(fval[:num_vars] - fval[num_vars], simi)
A[:, :m_lcon] = amat.T if amat is not None else amat
A[:, m_lcon:] = matprod((conmat[m_lcon:, :num_vars] -
np.tile(conmat[m_lcon:, num_vars], (num_vars, 1)).T), simi).T
# Calculate the trust-region trial step D. Note that D does NOT depend on CPEN.
d = trstlp(A, -conmat[:, num_vars], delta, g)
# Predict the change to F (PREREF) and to the constraint violation (PREREC) due
# to D.
preref = -inprod(d, g) # Can be negative
prerec = cval[num_vars] - np.max(np.append(0, conmat[:, num_vars] + matprod(d, A)))
# PREREC <= 0 or PREREF >=0 or either is NaN
if not (prerec > 0 and preref < 0):
break
# Powell's code defines BARMU = -PREREF / PREREC, and CPEN is increased to
# 2*BARMU if and only if it is currently less than 1.5*BARMU, a very
# "Powellful" scheme. In our implementation, however, we set CPEN directly to
# the maximum between its current value and 2*BARMU while handling possible
# overflow. The simplifies the scheme without worsening the performance of
# COBYLA.
cpen = max(cpen, min(-2 * preref / prerec, REALMAX))
if findpole(cpen, cval, fval) == num_vars:
break
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert cpen >= cpen and cpen > 0
assert preref + cpen * prerec > 0 or info == DAMAGING_ROUNDING or \
not (prerec >= 0 and np.maximum(prerec, preref) > 0) or not np.isfinite(preref)
return cpen
def fcratio(conmat, fval):
'''
This function calculates the ratio between the "typical change" of F and that of CONSTR.
See equations (12)-(13) in Section 3 of the COBYLA paper for the definition of the ratio.
'''
# Preconditions
if DEBUGGING:
assert np.size(fval) >= 1
assert np.size(conmat, 1) == np.size(fval)
#====================#
# Calculation starts #
#====================#
cmin = np.min(-conmat, axis=1)
cmax = np.max(-conmat, axis=1)
fmin = min(fval)
fmax = max(fval)
if any(cmin < 0.5 * cmax) and fmin < fmax:
denom = np.min(np.maximum(cmax, 0) - cmin, where=cmin < 0.5 * cmax, initial=np.inf)
# Powell mentioned the following alternative in section 4 of his COBYLA paper. According to a test
# on 20230610, it does not make much difference to the performance.
# denom = np.max(max(*cmax, 0) - cmin, mask=(cmin < 0.5 * cmax))
r = (fmax - fmin) / denom
else:
r = 0
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert r >= 0
return r

View File

@@ -0,0 +1,226 @@
'''
This module contains subroutines concerning the geometry-improving of the interpolation set.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
from ..common.consts import DEBUGGING
from ..common.linalg import isinv, matprod, inprod, norm, primasum, primapow2
import numpy as np
def setdrop_tr(ximproved, d, delta, rho, sim, simi):
'''
This function finds (the index) of a current interpolation point to be replaced with
the trust-region trial point. See (19)-(22) of the COBYLA paper.
N.B.:
1. If XIMPROVED == True, then JDROP > 0 so that D is included into XPT. Otherwise,
it is a bug.
2. COBYLA never sets JDROP = NUM_VARS
TODO: Check whether it improves the performance if JDROP = NUM_VARS is allowed when
XIMPROVED is True. Note that UPDATEXFC should be revised accordingly.
'''
# Local variables
itol = 0.1
# Sizes
num_vars = np.size(sim, 0)
# Preconditions
if DEBUGGING:
assert num_vars >= 1
assert np.size(d) == num_vars and all(np.isfinite(d))
assert delta >= rho and rho > 0
assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1
assert np.isfinite(sim).all()
assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0)
assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars
assert np.isfinite(simi).all()
assert isinv(sim[:, :num_vars], simi, itol)
#====================#
# Calculation starts #
#====================#
# -------------------------------------------------------------------------------------------------- #
# The following code is Powell's scheme for defining JDROP.
# -------------------------------------------------------------------------------------------------- #
# ! JDROP = 0 by default. It cannot be removed, as JDROP may not be set below in some cases (e.g.,
# ! when XIMPROVED == FALSE, MAXVAL(ABS(SIMID)) <= 1, and MAXVAL(VETA) <= EDGMAX).
# jdrop = 0
#
# ! SIMID(J) is the value of the J-th Lagrange function at D. It is the counterpart of VLAG in UOBYQA
# ! and DEN in NEWUOA/BOBYQA/LINCOA, but it excludes the value of the (N+1)-th Lagrange function.
# simid = matprod(simi, d)
# if (any(abs(simid) > 1) .or. (ximproved .and. any(.not. is_nan(simid)))) then
# jdrop = int(maxloc(abs(simid), mask=(.not. is_nan(simid)), dim=1), kind(jdrop))
# !!MATLAB: [~, jdrop] = max(simid, [], 'omitnan');
# end if
#
# ! VETA(J) is the distance from the J-th vertex of the simplex to the best vertex, taking the trial
# ! point SIM(:, N+1) + D into account.
# if (ximproved) then
# veta = sqrt(sum((sim(:, 1:n) - spread(d, dim=2, ncopies=n))**2, dim=1))
# !!MATLAB: veta = sqrt(sum((sim(:, 1:n) - d).^2)); % d should be a column! Implicit expansion
# else
# veta = sqrt(sum(sim(:, 1:n)**2, dim=1))
# end if
#
# ! VSIG(J) (J=1, .., N) is the Euclidean distance from vertex J to the opposite face of the simplex.
# vsig = ONE / sqrt(sum(simi**2, dim=2))
# sigbar = abs(simid) * vsig
#
# ! The following JDROP will overwrite the previous one if its premise holds.
# mask = (veta > factor_delta * delta .and. (sigbar >= factor_alpha * delta .or. sigbar >= vsig))
# if (any(mask)) then
# jdrop = int(maxloc(veta, mask=mask, dim=1), kind(jdrop))
# !!MATLAB: etamax = max(veta(mask)); jdrop = find(mask & ~(veta < etamax), 1, 'first');
# end if
#
# ! Powell's code does not include the following instructions. With Powell's code, if SIMID consists
# ! of only NaN, then JDROP can be 0 even when XIMPROVED == TRUE (i.e., D reduces the merit function).
# ! With the following code, JDROP cannot be 0 when XIMPROVED == TRUE, unless VETA is all NaN, which
# ! should not happen if X0 does not contain NaN, the trust-region/geometry steps never contain NaN,
# ! and we exit once encountering an iterate containing Inf (due to overflow).
# if (ximproved .and. jdrop <= 0) then ! Write JDROP <= 0 instead of JDROP == 0 for robustness.
# jdrop = int(maxloc(veta, mask=(.not. is_nan(veta)), dim=1), kind(jdrop))
# !!MATLAB: [~, jdrop] = max(veta, [], 'omitnan');
# end if
# -------------------------------------------------------------------------------------------------- #
# Powell's scheme ends here.
# -------------------------------------------------------------------------------------------------- #
# The following definition of JDROP is inspired by SETDROP_TR in UOBYQA/NEWUOA/BOBYQA/LINCOA.
# It is simpler and works better than Powell's scheme. Note that we allow JDROP to be NUM_VARS+1 if
# XIMPROVED is True, whereas Powell's code does not.
# See also (4.1) of Scheinberg-Toint-2010: Self-Correcting Geometry in Model-Based Algorithms for
# Derivative-Free Unconstrained Optimization, which refers to the strategy here as the "combined
# distance/poisedness criteria".
# DISTSQ[j] is the square of the distance from the jth vertex of the simplex to get "best" point so
# far, taking the trial point SIM[:, NUM_VARS] + D into account.
distsq = np.zeros(np.size(sim, 1))
if ximproved:
distsq[:num_vars] = primasum(primapow2(sim[:, :num_vars] - np.tile(d, (num_vars, 1)).T), axis=0)
distsq[num_vars] = primasum(d*d)
else:
distsq[:num_vars] = primasum(primapow2(sim[:, :num_vars]), axis=0)
distsq[num_vars] = 0
weight = np.maximum(1, distsq / primapow2(np.maximum(rho, delta/10))) # Similar to Powell's NEWUOA code.
# Other possible definitions of weight. They work almost the same as the one above.
# weight = distsq # Similar to Powell's LINCOA code, but WRONG. See comments in LINCOA/geometry.f90.
# weight = max(1, max(25 * distsq / delta**2)) # Similar to Powell's BOBYQA code, works well.
# weight = max(1, max(10 * distsq / delta**2))
# weight = max(1, max(1e2 * distsq / delta**2))
# weight = max(1, max(distsq / rho**2)) ! Similar to Powell's UOBYQA
# If 0 <= j < NUM_VARS, SIMID[j] is the value of the jth Lagrange function at D; the value of the
# (NUM_VARS+1)th Lagrange function is 1 - sum(SIMID). [SIMID, 1 - sum(SIMID)] is the counterpart of
# VLAG in UOBYQA and DEN in NEWUOA/BOBYQA/LINCOA.
simid = matprod(simi, d)
score = weight * abs(np.array([*simid, 1 - primasum(simid)]))
# If XIMPROVED = False (D does not render a better X), set SCORE[NUM_VARS] = -1 to avoid JDROP = NUM_VARS.
if not ximproved:
score[num_vars] = -1
# score[j] is NaN implies SIMID[j] is NaN, but we want abs(SIMID) to be big. So we
# exclude such j.
score[np.isnan(score)] = -1
jdrop = None
# The following if statement works a bit better than
# `if any(score > 1) or (any(score > 0) and ximproved)` from Powell's UOBYQA and
# NEWUOA code.
if any(score > 0): # Powell's BOBYQA and LINCOA code.
jdrop = np.argmax(score)
if (ximproved and jdrop is None):
jdrop = np.argmax(distsq)
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert jdrop is None or (0 <= jdrop < num_vars + 1)
assert jdrop <= num_vars or ximproved
assert jdrop >= 0 or not ximproved
# JDROP >= 1 when XIMPROVED = TRUE unless NaN occurs in DISTSQ, which should not happen if the
# starting point does not contain NaN and the trust-region/geometry steps never contain NaN.
return jdrop
def geostep(jdrop, amat, bvec, conmat, cpen, cval, delbar, fval, simi):
'''
This function calculates a geometry step so that the geometry of the interpolation set is improved
when SIM[: JDROP_GEO] is replaced with SIM[:, NUM_VARS] + D. See (15)--(17) of the COBYLA paper.
'''
# Sizes
m_lcon = np.size(bvec, 0) if bvec is not None else 0
num_constraints = np.size(conmat, 0)
num_vars = np.size(simi, 0)
# Preconditions
if DEBUGGING:
assert num_constraints >= m_lcon >= 0
assert num_vars >= 1
assert delbar > 0
assert cpen > 0
assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars
assert np.isfinite(simi).all()
assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1
assert not np.any(np.isnan(conmat) | np.isposinf(conmat))
assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
assert 0 <= jdrop < num_vars
#====================#
# Calculation starts #
#====================#
# SIMI[JDROP, :] is a vector perpendicular to the face of the simplex to the opposite of vertex
# JDROP. Set D to the vector in this direction and with length DELBAR.
d = simi[jdrop, :]
d = delbar * (d / norm(d))
# The code below chooses the direction of D according to an approximation of the merit function.
# See (17) of the COBYLA paper and line 225 of Powell's cobylb.f.
# Calculate the coefficients of the linear approximations to the objective and constraint functions.
# N.B.: CONMAT and SIMI have been updated after the last trust-region step, but G and A have not.
# So we cannot pass G and A from outside.
g = matprod(fval[:num_vars] - fval[num_vars], simi)
A = np.zeros((num_vars, num_constraints))
A[:, :m_lcon] = amat.T if amat is not None else amat
A[:, m_lcon:] = matprod((conmat[m_lcon:, :num_vars] -
np.tile(conmat[m_lcon:, num_vars], (num_vars, 1)).T), simi).T
# CVPD and CVND are the predicted constraint violation of D and -D by the linear models.
cvpd = np.max(np.append(0, conmat[:, num_vars] + matprod(d, A)))
cvnd = np.max(np.append(0, conmat[:, num_vars] - matprod(d, A)))
if -inprod(d, g) + cpen * cvnd < inprod(d, g) + cpen * cvpd:
d *= -1
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert np.size(d) == num_vars and all(np.isfinite(d))
# In theory, ||S|| == DELBAR, which may be false due to rounding, but not too far.
# It is crucial to ensure that the geometry step is nonzero, which holds in theory.
assert 0.9 * delbar < np.linalg.norm(d) <= 1.1 * delbar
return d

View File

@@ -0,0 +1,215 @@
'''
This module contains subroutines for initialization.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
from ..common.checkbreak import checkbreak_con
from ..common.consts import DEBUGGING, REALMAX
from ..common.infos import INFO_DEFAULT
from ..common.evaluate import evaluate
from ..common.history import savehist
from ..common.linalg import inv
from ..common.message import fmsg
from ..common.selectx import savefilt
import numpy as np
def initxfc(calcfc, iprint, maxfun, constr0, amat, bvec, ctol, f0, ftarget, rhobeg, x0,
xhist, fhist, chist, conhist, maxhist):
'''
This subroutine does the initialization concerning X, function values, and
constraints.
'''
# Local variables
solver = 'COBYLA'
srname = "INITIALIZE"
# Sizes
num_constraints = np.size(constr0)
m_lcon = np.size(bvec) if bvec is not None else 0
m_nlcon = num_constraints - m_lcon
num_vars = np.size(x0)
# Preconditions
if DEBUGGING:
assert num_constraints >= 0, f'M >= 0 {srname}'
assert num_vars >= 1, f'N >= 1 {srname}'
assert abs(iprint) <= 3, f'IPRINT is 0, 1, -1, 2, -2, 3, or -3 {srname}'
# assert conmat.shape == (num_constraints , num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}'
# assert cval.size == num_vars + 1, f'CVAL.size == N+1 {srname}'
# assert maxchist * (maxchist - maxhist) == 0, f'CHIST.shape == 0 or MAXHIST {srname}'
# assert conhist.shape[0] == num_constraints and maxconhist * (maxconhist - maxhist) == 0, 'CONHIST.shape[0] == num_constraints, SIZE(CONHIST, 2) == 0 or MAXHIST {srname)}'
# assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}'
# assert xhist.shape[0] == num_vars and maxxhist * (maxxhist - maxhist) == 0, 'XHIST.shape[0] == N, SIZE(XHIST, 2) == 0 or MAXHIST {srname)}'
assert all(np.isfinite(x0)), f'X0 is finite {srname}'
assert rhobeg > 0, f'RHOBEG > 0 {srname}'
#====================#
# Calculation starts #
#====================#
# Initialize info to the default value. At return, a value different from this
# value will indicate an abnormal return
info = INFO_DEFAULT
# Initialize the simplex. It will be revised during the initialization.
sim = np.eye(num_vars, num_vars+1) * rhobeg
sim[:, num_vars] = x0
# Initialize the matrix simi. In most cases simi is overwritten, but not always.
simi = np.eye(num_vars) / rhobeg
# evaluated[j] = True iff the function/constraint of SIM[:, j] has been evaluated.
evaluated = np.zeros(num_vars+1, dtype=bool)
# Initialize fval
fval = np.zeros(num_vars+1) + REALMAX
cval = np.zeros(num_vars+1) + REALMAX
conmat = np.zeros((num_constraints, num_vars+1)) + REALMAX
for k in range(num_vars + 1):
x = sim[:, num_vars].copy()
# We will evaluate F corresponding to SIM(:, J).
if k == 0:
j = num_vars
f = f0
constr = constr0
else:
j = k - 1
x[j] += rhobeg
f, constr = evaluate(calcfc, x, m_nlcon, amat, bvec)
cstrv = np.max(np.append(0, constr))
# Print a message about the function/constraint evaluation according to IPRINT.
fmsg(solver, 'Initialization', iprint, k, rhobeg, f, x, cstrv, constr)
# Save X, F, CONSTR, CSTRV into the history.
savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist)
# Save F, CONSTR, and CSTRV to FVAL, CONMAT, and CVAL respectively.
evaluated[j] = True
fval[j] = f
conmat[:, j] = constr
cval[j] = cstrv
# Check whether to exit.
subinfo = checkbreak_con(maxfun, k, cstrv, ctol, f, ftarget, x)
if subinfo != INFO_DEFAULT:
info = subinfo
break
# Exchange the new vertex of the initial simplex with the optimal vertex if necessary.
# This is the ONLY part that is essentially non-parallel.
if j < num_vars and fval[j] < fval[num_vars]:
fval[j], fval[num_vars] = fval[num_vars], fval[j]
cval[j], cval[num_vars] = cval[num_vars], cval[j]
conmat[:, [j, num_vars]] = conmat[:, [num_vars, j]]
sim[:, num_vars] = x
sim[j, :j+1] = -rhobeg # SIM[:, :j+1] is lower triangular
nf = np.count_nonzero(evaluated)
if evaluated.all():
# Initialize SIMI to the inverse of SIM[:, :num_vars]
simi = inv(sim[:, :num_vars])
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert nf <= maxfun, f'NF <= MAXFUN {srname}'
assert evaluated.size == num_vars + 1, f'EVALUATED.size == Num_vars + 1 {srname}'
# assert chist.size == maxchist, f'CHIST.size == MAXCHIST {srname}'
# assert conhist.shape== (num_constraints, maxconhist), f'CONHIST.shape == [M, MAXCONHIST] {srname}'
assert conmat.shape == (num_constraints, num_vars + 1), f'CONMAT.shape = [M, N+1] {srname}'
assert not (np.isnan(conmat).any() or np.isneginf(conmat).any()), f'CONMAT does not contain NaN/-Inf {srname}'
assert cval.size == num_vars + 1 and not (any(cval < 0) or any(np.isnan(cval)) or any(np.isposinf(cval))), f'CVAL.shape == Num_vars+1 and CVAL does not contain negative values or NaN/+Inf {srname}'
# assert fhist.shape == maxfhist, f'FHIST.shape == MAXFHIST {srname}'
# assert maxfhist * (maxfhist - maxhist) == 0, f'FHIST.shape == 0 or MAXHIST {srname}'
assert fval.size == num_vars + 1 and not (any(np.isnan(fval)) or any(np.isposinf(fval))), f'FVAL.shape == Num_vars+1 and FVAL is not NaN/+Inf {srname}'
# assert xhist.shape == (num_vars, maxxhist), f'XHIST.shape == [N, MAXXHIST] {srname}'
assert sim.shape == (num_vars, num_vars + 1), f'SIM.shape == [N, N+1] {srname}'
assert np.isfinite(sim).all(), f'SIM is finite {srname}'
assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0), f'SIM(:, 1:N) has no zero column {srname}'
assert simi.shape == (num_vars, num_vars), f'SIMI.shape == [N, N] {srname}'
assert np.isfinite(simi).all(), f'SIMI is finite {srname}'
assert np.allclose(sim[:, :num_vars] @ simi, np.eye(num_vars), rtol=0.1, atol=0.1) or not all(evaluated), f'SIMI = SIM(:, 1:N)^{-1} {srname}'
return evaluated, conmat, cval, sim, simi, fval, nf, info
def initfilt(conmat, ctol, cweight, cval, fval, sim, evaluated, cfilt, confilt, ffilt, xfilt):
'''
This function initializes the filter (XFILT, etc) that will be used when selecting
x at the end of the solver.
N.B.:
1. Why not initialize the filters using XHIST, etc? Because the history is empty if
the user chooses not to output it.
2. We decouple INITXFC and INITFILT so that it is easier to parallelize the former
if needed.
'''
# Sizes
num_constraints = conmat.shape[0]
num_vars = sim.shape[0]
maxfilt = len(ffilt)
# Preconditions
if DEBUGGING:
assert num_constraints >= 0
assert num_vars >= 1
assert maxfilt >= 1
assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt
assert np.size(cfilt) == maxfilt
assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt
assert np.size(ffilt) == maxfilt
assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1
assert not (np.isnan(conmat) | np.isneginf(conmat)).any()
assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1
assert np.isfinite(sim).all()
assert all(np.max(abs(sim[:, :num_vars]), axis=0) > 0)
assert np.size(evaluated) == num_vars + 1
#====================#
# Calculation starts #
#====================#
nfilt = 0
for i in range(num_vars+1):
if evaluated[i]:
if i < num_vars:
x = sim[:, i] + sim[:, num_vars]
else:
x = sim[:, i] # i == num_vars, i.e. the last column
nfilt, cfilt, ffilt, xfilt, confilt = savefilt(cval[i], ctol, cweight, fval[i], x, nfilt, cfilt, ffilt, xfilt, conmat[:, i], confilt)
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert nfilt <= maxfilt
assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt
assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any()
assert np.size(cfilt) == maxfilt
assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt]))
assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt
assert not (np.isnan(xfilt[:, :nfilt])).any()
# The last calculated X can be Inf (finite + finite can be Inf numerically).
assert np.size(ffilt) == maxfilt
assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt]))
return nfilt

View File

@@ -0,0 +1,492 @@
'''
This module provides subroutines concerning the trust-region calculations of COBYLA.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
import numpy as np
import numpy.typing as npt
from ..common.consts import DEBUGGING, REALMIN, REALMAX, EPS
from ..common.powalg import qradd_Rdiag, qrexc_Rdiag
from ..common.linalg import isminor, matprod, inprod, lsqr, primasum
def trstlp(A, b, delta, g):
'''
This function calculated an n-component vector d by the following two stages. In the first
stage, d is set to the shortest vector that minimizes the greatest violation of the constraints
A.T @ D <= B, K = 1, 2, 3, ..., M,
subject to the Euclidean length of d being at most delta. If its length is strictly less than
delta, then the second stage uses the resultant freedom in d to minimize the objective function
G.T @ D
subject to no increase in any greatest constraint violation.
It is possible but rare that a degeneracy may prevent d from attaining the target length delta.
cviol is the largest constraint violation of the current d: max(max(A.T@D - b), 0)
icon is the index of a most violated constraint if cviol is positive.
nact is the number of constraints in the active set and iact[0], ..., iact[nact-1] are their indices,
while the remainder of the iact contains a permutation of the remaining constraint indicies.
N.B.: nact <= min(num_constraints, num_vars). Obviously nact <= num_constraints. In addition, the constraints
in iact[0, ..., nact-1] have linearly independent gradients (see the comments above the instruction
that delete a constraint from the active set to make room for the new active constraint with index iact[icon]);
it can also be seen from the update of nact: starting from 0, nact is incremented only if nact < n.
Further, Z is an orthogonal matrix whose first nact columns can be regarded as the result of
Gram-Schmidt applied to the active constraint gradients. For j = 0, 1, ..., nact-1, the number
zdota[j] is the scalar product of the jth column of Z with the gradient of the jth active
constraint. d is the current vector of variables and here the residuals of the active constraints
should be zero. Further, the active constraints have nonnegative Lagrange multipliers that are
held at the beginning of vmultc. The remainder of this vector holds the residuals of the inactive
constraints at d, the ordering of the components of vmultc being in agreement with the permutation
of the indices of the constraints that is in iact. All these residuals are nonnegative, which is
achieved by the shift cviol that makes the least residual zero.
N.B.:
0. In Powell's implementation, the constraints are A.T @ D >= B. In other words, the A and B in
our implementation are the negative of those in Powell's implementation.
1. The algorithm was NOT documented in the COBYLA paper. A note should be written to introduce it!
2. As a major part of the algorithm (see trstlp_sub), the code maintains and updates the QR
factorization of A[iact[:nact]], i.e. the gradients of all the active (linear) constraints. The
matrix Z is indeed Q, and the vector zdota is the diagonal of R. The factorization is updated by
Givens rotations when an index is added in or removed from iact.
3. There are probably better algorithms available for the trust-region linear programming problem.
'''
# Sizes
num_constraints = A.shape[1]
num_vars = A.shape[0]
# Preconditions
if DEBUGGING:
assert num_vars >= 1
assert num_constraints >= 0
assert np.size(g) == num_vars
assert np.size(b) == num_constraints
assert delta > 0
vmultc = np.zeros(num_constraints + 1)
iact = np.zeros(num_constraints + 1, dtype=int)
nact = 0
d = np.zeros(num_vars)
z = np.zeros((num_vars, num_vars))
# ==================
# Calculation starts
# ==================
# Form A_aug and B_aug. This allows the gradient of the objective function to be regarded as the
# gradient of a constraint in the second stage.
A_aug = np.hstack([A, g.reshape((num_vars, 1))])
b_aug = np.hstack([b, 0])
# Scale the problem if A contains large values. Otherwise floating point exceptions may occur.
# Note that the trust-region step is scale invariant.
for i in range(num_constraints+1): # Note that A_aug.shape[1] == num_constraints+1
if (maxval:=max(abs(A_aug[:, i]))) > 1e12:
modscal = max(2*REALMIN, 1/maxval)
A_aug[:, i] *= modscal
b_aug[i] *= modscal
# Stage 1: minimize the 1+infinity constraint violation of the linearized constraints.
iact[:num_constraints], nact, d, vmultc[:num_constraints], z = trstlp_sub(iact[:num_constraints], nact, 1, A_aug[:, :num_constraints], b_aug[:num_constraints], delta, d, vmultc[:num_constraints], z)
# Stage 2: minimize the linearized objective without increasing the 1_infinity constraint violation.
iact, nact, d, vmultc, z = trstlp_sub(iact, nact, 2, A_aug, b_aug, delta, d, vmultc, z)
# ================
# Calculation ends
# ================
# Postconditions
if DEBUGGING:
assert all(np.isfinite(d))
# Due to rounding, it may happen that ||D|| > DELTA, but ||D|| > 2*DELTA is highly improbable.
assert np.linalg.norm(d) <= 2 * delta
return d
def trstlp_sub(iact: npt.NDArray, nact: int, stage, A, b, delta, d, vmultc, z):
'''
This subroutine does the real calculations for trstlp, both stage 1 and stage 2.
Major differences between stage 1 and stage 2:
1. Initialization. Stage 2 inherits the values of some variables from stage 1, so they are
initialized in stage 1 but not in stage 2.
2. cviol. cviol is updated after at iteration in stage 1, while it remains a constant in stage2.
3. sdirn. See the definition of sdirn in the code for details.
4. optnew. The two stages have different objectives, so optnew is updated differently.
5. step. step <= cviol in stage 1.
'''
zdasav = np.zeros(z.shape[1])
vmultd = np.zeros(np.size(vmultc))
zdota = np.zeros(np.size(z, 1))
# Sizes
mcon = np.size(A, 1)
num_vars = np.size(A, 0)
# Preconditions
if DEBUGGING:
assert num_vars >= 1
assert stage == 1 or stage == 2
assert (mcon >= 0 and stage == 1) or (mcon >= 1 and stage == 2)
assert np.size(b) == mcon
assert np.size(iact) == mcon
assert np.size(vmultc) == mcon
assert np.size(d) == num_vars
assert np.size(z, 0) == num_vars and np.size(z, 1) == num_vars
assert delta > 0
if stage == 2:
assert all(np.isfinite(d)) and np.linalg.norm(d) <= 2 * delta
assert nact >= 0 and nact <= np.minimum(mcon, num_vars)
assert all(vmultc[:mcon]) >= 0
# N.B.: Stage 1 defines only VMULTC(1:M); VMULTC(M+1) is undefined!
# Initialize according to stage
if stage == 1:
iact = np.linspace(0, mcon-1, mcon, dtype=int)
nact = 0
d = np.zeros(num_vars)
cviol = np.max(np.append(0, -b))
vmultc = cviol + b
z = np.eye(num_vars)
if mcon == 0 or cviol <= 0:
# Check whether a quick return is possible. Make sure the in-outputs have been initialized.
return iact, nact, d, vmultc, z
if all(np.isnan(b)):
return iact, nact, d, vmultc, z
else:
icon = np.nanargmax(-b)
num_constraints = mcon
sdirn = np.zeros(len(d))
else:
if inprod(d, d) >= delta*delta:
# Check whether a quick return is possible.
return iact, nact, d, vmultc, z
iact[mcon-1] = mcon-1
vmultc[mcon-1] = 0
num_constraints = mcon - 1
icon = mcon - 1
# In Powell's code, stage 2 uses the zdota and cviol calculated by stage1. Here we recalculate
# them so that they need not be passed from stage 1 to 2, and hence the coupling is reduced.
cviol = np.max(np.append(0, matprod(d, A[:, :num_constraints]) - b[:num_constraints]))
zdota[:nact] = [inprod(z[:, k], A[:, iact[k]]) for k in range(nact)]
# More initialization
optold = REALMAX
nactold = nact
nfail = 0
# Zaikun 20211011: vmultd is computed from scratch at each iteration, but vmultc is inherited
# Powell's code can encounter infinite cycling, which did happen when testing the following CUTEst
# problems: DANWOODLS, GAUSS1LS, GAUSS2LS, GAUSS3LS, KOEBHELB, TAX13322, TAXR13322. Indeed, in all
# these cases, Inf/NaN appear in d due to extremely large values in A (up to 10^219). To resolve
# this, we set the maximal number of iterations to maxiter, and terminate if Inf/NaN occurs in d.
maxiter = np.minimum(10000, 100*max(num_constraints, num_vars))
for iter in range(maxiter):
if DEBUGGING:
assert all(vmultc >= 0)
if stage == 1:
optnew = cviol
else:
optnew = inprod(d, A[:, mcon-1])
# End the current stage of the calculation if 3 consecutive iterations have either failed to
# reduce the best calculated value of the objective function or to increase the number of active
# constraints since the best value was calculated. This strategy prevents cycling, but there is
# a remote possibility that it will cause premature termination.
if optnew < optold or nact > nactold:
nactold = nact
nfail = 0
else:
nfail += 1
optold = np.minimum(optold, optnew)
if nfail == 3:
break
# If icon exceeds nact, then we add the constraint with index iact[icon] to the active set.
if icon >= nact: # In Python this needs to be >= since Python is 0-indexed (in Fortran we have 1 > 0, in Python we need 0 >= 0)
zdasav[:nact] = zdota[:nact]
nactsav = nact
z, zdota, nact = qradd_Rdiag(A[:, iact[icon]], z, zdota, nact) # May update nact to nact+1
# Indeed it suffices to pass zdota[:min(num_vars, nact+1)] to qradd as follows:
# qradd(A[:, iact[icon]], z, zdota[:min(num_vars, nact+1)], nact)
if nact == nactsav + 1:
# N.B.: It is possible to index arrays using [nact, icon] when nact == icon.
# Zaikun 20211012: Why should vmultc[nact] = 0?
if nact != (icon + 1): # Need to add 1 to Python for 0 indexing
vmultc[[icon, nact-1]] = vmultc[nact-1], 0
iact[[icon, nact-1]] = iact[[nact-1, icon]]
else:
vmultc[nact-1] = 0
else:
# Zaikun 20211011:
# 1. VMULTD is calculated from scratch for the first time (out of 2) in one iteration.
# 2. Note that IACT has not been updated to replace IACT[NACT] with IACT[ICON]. Thus
# A[:, IACT[:NACT]] is the UNUPDATED version before QRADD (note Z[:, :NACT] remains the
# same before and after QRADD). Therefore if we supply ZDOTA to LSQR (as Rdiag) as
# Powell did, we should use the UNUPDATED version, namely ZDASAV.
# vmultd[:nact] = lsqr(A[:, iact[:nact]], A[:, iact[icon]], z[:, :nact], zdasav[:nact])
vmultd[:nact] = lsqr(A[:, iact[:nact]], A[:, iact[icon]], z[:, :nact], zdasav[:nact])
if not any(np.logical_and(vmultd[:nact] > 0, iact[:nact] <= num_constraints)):
# N.B.: This can be triggered by NACT == 0 (among other possibilities)! This is
# important, because NACT will be used as an index in the sequel.
break
# vmultd[NACT+1:mcon] is not used, but we have to initialize it in Fortran, or compilers
# complain about the where construct below (another solution: restrict where to 1:NACT).
vmultd[nact:mcon] = -1 # len(vmultd) == mcon
# Revise the Lagrange multipliers. The revision is not applicable to vmultc[nact:num_constraints].
fracmult = [vmultc[i]/vmultd[i] if vmultd[i] > 0 and iact[i] <= num_constraints else REALMAX for i in range(nact)]
# Only the places with vmultd > 0 and iact <= m is relevant below, if any.
frac = min(fracmult[:nact]) # fracmult[nact:mcon] may contain garbage
vmultc[:nact] = np.maximum(np.zeros(len(vmultc[:nact])), vmultc[:nact] - frac*vmultd[:nact])
# Reorder the active constraints so that the one to be replaced is at the end of the list.
# Exit if the new value of zdota[nact] is not acceptable. Powell's condition for the
# following If: not abs(zdota[nact]) > 0. Note that it is different from
# 'abs(zdota[nact]) <=0)' as zdota[nact] can be NaN.
# N.B.: We cannot arrive here with nact == 0, which should have triggered a break above
if np.isnan(zdota[nact - 1]) or abs(zdota[nact - 1]) <= EPS**2:
break
vmultc[[icon, nact - 1]] = 0, frac # vmultc[[icon, nact]] is valid as icon > nact
iact[[icon, nact - 1]] = iact[[nact - 1, icon]]
# end if nact == nactsav + 1
# In stage 2, ensure that the objective continues to be treated as the last active constraint.
# Zaikun 20211011, 20211111: Is it guaranteed for stage 2 that iact[nact-1] = mcon when
# iact[nact] != mcon??? If not, then how does the following procedure ensure that mcon is
# the last of iact[:nact]?
if stage == 2 and iact[nact - 1] != (mcon - 1):
if nact <= 1:
# We must exit, as nact-2 is used as an index below. Powell's code does not have this.
break
z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], nact - 2) # We pass nact-2 in Python instead of nact-1
# Indeed, it suffices to pass Z[:, :nact] to qrexc as follows:
# z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], nact - 1)
iact[[nact-2, nact-1]] = iact[[nact-1, nact-2]]
vmultc[[nact-2, nact-1]] = vmultc[[nact-1, nact-2]]
# Zaikun 20211117: It turns out that the last few lines do not guarantee iact[nact] == num_vars in
# stage 2; the following test cannot be passed. IS THIS A BUG?!
# assert iact[nact] == mcon or stage == 1, 'iact[nact] must == mcon in stage 2'
# Powell's code does not have the following. It avoids subsequent floating points exceptions.
if np.isnan(zdota[nact-1]) or abs(zdota[nact-1]) <= EPS**2:
break
# Set sdirn to the direction of the next change to the current vector of variables
# Usually during stage 1 the vector sdirn gives a search direction that reduces all the
# active constraint violations by one simultaneously.
if stage == 1:
sdirn -= ((inprod(sdirn, A[:, iact[nact-1]]) + 1)/zdota[nact-1])*z[:, nact-1]
else:
sdirn = -1/zdota[nact-1]*z[:, nact-1]
else: # icon < nact
# Delete the constraint with the index iact[icon] from the active set, which is done by
# reordering iact[icon:nact] into [iact[icon+1:nact], iact[icon]] and then reduce nact to
# nact - 1. In theory, icon > 0.
# assert icon > 0, "icon > 0 is required" # For Python I think this is irrelevant
z, zdota[:nact] = qrexc_Rdiag(A[:, iact[:nact]], z, zdota[:nact], icon) # qrexc does nothing if icon == nact
# Indeed, it suffices to pass Z[:, :nact] to qrexc as follows:
# z[:, :nact], zdota[:nact] = qrexc(A[:, iact[:nact]], z[:, :nact], zdota[:nact], icon)
iact[icon:nact] = [*iact[icon+1:nact], iact[icon]]
vmultc[icon:nact] = [*vmultc[icon+1:nact], vmultc[icon]]
nact -= 1
# Powell's code does not have the following. It avoids subsequent exceptions.
# Zaikun 20221212: In theory, nact > 0 in stage 2, as the objective function should always
# be considered as an "active constraint" --- more precisely, iact[nact] = mcon. However,
# looking at the code, I cannot see why in stage 2 nact must be positive after the reduction
# above. It did happen in stage 1 that nact became 0 after the reduction --- this is
# extremely rare, and it was never observed until 20221212, after almost one year of
# random tests. Maybe nact is theoretically positive even in stage 1?
if stage == 2 and nact < 0:
break # If this case ever occurs, we have to break, as nact is used as an index below.
if nact > 0:
if np.isnan(zdota[nact-1]) or abs(zdota[nact-1]) <= EPS**2:
break
# Set sdirn to the direction of the next change to the current vector of variables.
if stage == 1:
sdirn -= inprod(sdirn, z[:, nact]) * z[:, nact]
# sdirn is orthogonal to z[:, nact+1]
else:
sdirn = -1/zdota[nact-1] * z[:, nact-1]
# end if icon > nact
# Calculate the step to the trust region boundary or take the step that reduces cviol to 0.
# ----------------------------------------------------------------------------------------- #
# The following calculation of step is adopted from NEWUOA/BOBYQA/LINCOA. It seems to improve
# the performance of COBYLA. We also found that removing the precaution about underflows is
# beneficial to the overall performance of COBYLA --- the underflows are harmless anyway.
dd = delta*delta - inprod(d, d)
ss = inprod(sdirn, sdirn)
sd = inprod(sdirn, d)
if dd <= 0 or ss <= EPS * delta*delta or np.isnan(sd):
break
# sqrtd: square root of a discriminant. The max avoids sqrtd < abs(sd) due to underflow
sqrtd = max(np.sqrt(ss*dd + sd*sd), abs(sd), np.sqrt(ss * dd))
if sd > 0:
step = dd / (sqrtd + sd)
else:
step = (sqrtd - sd) / ss
# step < 0 should not happen. Step can be 0 or NaN when, e.g., sd or ss becomes inf
if step <= 0 or not np.isfinite(step):
break
# Powell's approach and comments are as follows.
# -------------------------------------------------- #
# The two statements below that include the factor eps prevent
# some harmless underflows that occurred in a test calculation
# (Zaikun: here, eps is the machine epsilon; Powell's original
# code used 1.0e-6, and Powell's code was written in single
# precision). Further, we skip the step if it could be 0 within
# a reasonable tolerance for computer rounding errors.
# !dd = delta*delta - sum(d**2, mask=(abs(d) >= EPS * delta))
# !ss = inprod(sdirn, sdirn)
# !if (dd <= 0) then
# ! exit
# !end if
# !sd = inprod(sdirn, d)
# !if (abs(sd) >= EPS * sqrt(ss * dd)) then
# ! step = dd / (sqrt(ss * dd + sd*sd) + sd)
# !else
# ! step = dd / (sqrt(ss * dd) + sd)
# !end if
# -------------------------------------------------- #
if stage == 1:
if isminor(cviol, step):
break
step = min(step, cviol)
# Set dnew to the new variables if step is the steplength, and reduce cviol to the corresponding
# maximum residual if stage 1 is being done
dnew = d + step * sdirn
if stage == 1:
cviol = np.max(np.append(0, matprod(dnew, A[:, iact[:nact]]) - b[iact[:nact]]))
# N.B.: cviol will be used when calculating vmultd[nact+1:mcon].
# Zaikun 20211011:
# 1. vmultd is computed from scratch for the second (out of 2) time in one iteration.
# 2. vmultd[:nact] and vmultd[nact:mcon] are calculated separately with no coupling.
# 3. vmultd will be calculated from scratch again in the next iteration.
# Set vmultd to the vmultc vector that would occur if d became dnew. A device is included to
# force vmultd[k] = 0 if deviations from this value can be attributed to computer rounding
# errors. First calculate the new Lagrange multipliers.
vmultd[:nact] = -lsqr(A[:, iact[:nact]], dnew, z[:, :nact], zdota[:nact])
if stage == 2:
vmultd[nact-1] = max(0, vmultd[nact-1]) # This seems never activated.
# Complete vmultd by finding the new constraint residuals. (Powell wrote "Complete vmultc ...")
cvshift = cviol - (matprod(dnew, A[:, iact]) - b[iact]) # Only cvshift[nact+1:mcon] is needed
cvsabs = matprod(abs(dnew), abs(A[:, iact])) + abs(b[iact]) + cviol
cvshift[isminor(cvshift, cvsabs)] = 0
vmultd[nact:mcon] = cvshift[nact:mcon]
# Calculate the fraction of the step from d to dnew that will be taken
fracmult = [vmultc[i]/(vmultc[i] - vmultd[i]) if vmultd[i] < 0 else REALMAX for i in range(len(vmultd))]
# Only the places with vmultd < 0 are relevant below, if any.
icon = np.argmin(np.append(1, fracmult)) - 1
frac = min(np.append(1, fracmult))
# Update d, vmultc, and cviol
dold = d
d = (1 - frac)*d + frac * dnew
vmultc = np.maximum(0, (1 - frac)*vmultc + frac*vmultd)
# Break in the case of inf/nan in d or vmultc.
if not (np.isfinite(primasum(abs(d))) and np.isfinite(primasum(abs(vmultc)))):
d = dold # Should we restore also iact, nact, vmultc, and z?
break
if stage == 1:
# cviol = (1 - frac) * cvold + frac * cviol # Powell's version
# In theory, cviol = np.max(np.append(d@A - b, 0)), yet the
# cviol updated as above can be quite different from this value if A has huge entries (e.g., > 1e20)
cviol = np.max(np.append(0, matprod(d, A) - b))
if icon < 0 or icon >= mcon:
# In Powell's code, the condition is icon == 0. Indeed, icon < 0 cannot hold unless
# fracmult contains only nan, which should not happen; icon >= mcon should never occur.
break
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert np.size(iact) == mcon
assert np.size(vmultc) == mcon
assert all(vmultc >= 0)
assert np.size(d) == num_vars
assert all(np.isfinite(d))
assert np.linalg.norm(d) <= 2 * delta
assert np.size(z, 0) == num_vars and np.size(z, 1) == num_vars
assert nact >= 0 and nact <= np.minimum(mcon, num_vars)
return iact, nact, d, vmultc, z
def trrad(delta_in, dnorm, eta1, eta2, gamma1, gamma2, ratio):
'''
This function updates the trust region radius according to RATIO and DNORM.
'''
# Preconditions
if DEBUGGING:
assert delta_in >= dnorm > 0
assert 0 <= eta1 <= eta2 < 1
assert 0 < gamma1 < 1 < gamma2
# By the definition of RATIO in ratio.f90, RATIO cannot be NaN unless the
# actual reduction is NaN, which should NOT happen due to the moderated extreme
# barrier.
assert not np.isnan(ratio)
#====================#
# Calculation starts #
#====================#
if ratio <= eta1:
delta = gamma1 * dnorm # Powell's UOBYQA/NEWUOA
# delta = gamma1 * delta_in # Powell's COBYLA/LINCOA
# delta = min(gamma1 * delta_in, dnorm) # Powell's BOBYQA
elif ratio <= eta2:
delta = max(gamma1 * delta_in, dnorm) # Powell's UOBYQA/NEWUOA/BOBYQA/LINCOA
else:
delta = max(gamma1 * delta_in, gamma2 * dnorm) # Powell's NEWUOA/BOBYQA
# delta = max(delta_in, gamma2 * dnorm) # Modified version. Works well for UOBYQA
# For noise-free CUTEst problems of <= 100 variables, Powell's version works slightly better
# than the modified one.
# delta = max(delta_in, 1.25*dnorm, dnorm + rho) # Powell's UOBYQA
# delta = min(max(gamma1 * delta_in, gamma2 * dnorm), gamma3 * delta_in) # Powell's LINCOA, gamma3 = np.sqrt(2)
# For noisy problems, the following may work better.
# if ratio <= eta1:
# delta = gamma1 * dnorm
# elseif ratio <= eta2: # Ensure DELTA >= DELTA_IN
# delta = delta_in
# else: # Ensure DELTA > DELTA_IN with a constant factor
# delta = max(delta_in * (1 + gamma2) / 2, gamma2 * dnorm)
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert delta > 0
return delta

View File

@@ -0,0 +1,289 @@
'''
This module contains subroutines concerning the update of the interpolation set.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
from ..common.consts import DEBUGGING
from ..common.infos import DAMAGING_ROUNDING, INFO_DEFAULT
from ..common.linalg import isinv, matprod, outprod, inprod, inv, primasum
import numpy as np
def updatexfc(jdrop, constr, cpen, cstrv, d, f, conmat, cval, fval, sim, simi):
'''
This function revises the simplex by updating the elements of SIM, SIMI, FVAL, CONMAT, and CVAL
'''
# Local variables
itol = 1
# Sizes
num_constraints = np.size(constr)
num_vars = np.size(sim, 0)
# Preconditions
if DEBUGGING:
assert num_constraints >= 0
assert num_vars >= 1
assert jdrop >= 0 and jdrop <= num_vars + 1
assert not any(np.isnan(constr) | np.isneginf(constr))
assert not (np.isnan(cstrv) | np.isposinf(cstrv))
assert np.size(d) == num_vars and all(np.isfinite(d))
assert not (np.isnan(f) | np.isposinf(f))
assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1
assert not (np.isnan(conmat) | np.isneginf(conmat)).any()
assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1
assert np.isfinite(sim).all()
assert all(primasum(abs(sim[:, :num_vars]), axis=0) > 0)
assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars
assert np.isfinite(simi).all()
assert isinv(sim[:, :num_vars], simi, itol)
#====================#
# Calculation starts #
#====================#
# Do nothing when JDROP is None. This can only happen after a trust-region step.
if jdrop is None: # JDROP is None is impossible if the input is correct.
return conmat, cval, fval, sim, simi, INFO_DEFAULT
sim_old = sim
simi_old = simi
if jdrop < num_vars:
sim[:, jdrop] = d
simi_jdrop = simi[jdrop, :] / inprod(simi[jdrop, :], d)
simi -= outprod(matprod(simi, d), simi_jdrop)
simi[jdrop, :] = simi_jdrop
else: # jdrop == num_vars
sim[:, num_vars] += d
sim[:, :num_vars] -= np.tile(d, (num_vars, 1)).T
simid = matprod(simi, d)
sum_simi = primasum(simi, axis=0)
simi += outprod(simid, sum_simi / (1 - sum(simid)))
# Check whether SIMI is a poor approximation to the inverse of SIM[:, :NUM_VARS]
# Calculate SIMI from scratch if the current one is damaged by rounding errors.
itol = 1
erri = np.max(abs(matprod(simi, sim[:, :num_vars]) - np.eye(num_vars))) # np.max returns NaN if any input is NaN
if erri > 0.1 * itol or np.isnan(erri):
simi_test = inv(sim[:, :num_vars])
erri_test = np.max(abs(matprod(simi_test, sim[:, :num_vars]) - np.eye(num_vars)))
if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)):
simi = simi_test
erri = erri_test
# If SIMI is satisfactory, then update FVAL, CONMAT, CVAL, and the pole position. Otherwise restore
# SIM and SIMI, and return with INFO = DAMAGING_ROUNDING.
if erri <= itol:
fval[jdrop] = f
conmat[:, jdrop] = constr
cval[jdrop] = cstrv
# Switch the best vertex to the pole position SIM[:, NUM_VARS] if it is not there already
conmat, cval, fval, sim, simi, info = updatepole(cpen, conmat, cval, fval, sim, simi)
else:
info = DAMAGING_ROUNDING
sim = sim_old
simi = simi_old
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1
assert not (np.isnan(conmat) | np.isneginf(conmat)).any()
assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1
assert np.isfinite(sim).all()
assert all(primasum(abs(sim[:, :num_vars]), axis=0) > 0)
assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars
assert np.isfinite(simi).all()
assert isinv(sim[:, :num_vars], simi, itol) or info == DAMAGING_ROUNDING
return sim, simi, fval, conmat, cval, info
def findpole(cpen, cval, fval):
'''
This subroutine identifies the best vertex of the current simplex with respect to the merit
function PHI = F + CPEN * CSTRV.
'''
# Size
num_vars = np.size(fval) - 1
# Preconditions
if DEBUGGING:
assert cpen > 0
assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
#====================#
# Calculation starts #
#====================#
# Identify the optimal vertex of the current simplex
jopt = np.size(fval) - 1
phi = fval + cpen * cval
phimin = min(phi)
# Essentially jopt = np.argmin(phi). However, we keep jopt = num_vars unless there
# is a strictly better choice. When there are multiple choices, we choose the jopt
# with the smallest value of cval.
if phimin < phi[jopt] or any((cval < cval[jopt]) & (phi <= phi[jopt])):
# While we could use argmin(phi), there may be two places where phi achieves
# phimin, and in that case we should choose the one with the smallest cval.
jopt = np.ma.array(cval, mask=(phi > phimin)).argmin()
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert jopt >= 0 and jopt < num_vars + 1
assert jopt == num_vars or phi[jopt] < phi[num_vars] or (phi[jopt] <= phi[num_vars] and cval[jopt] < cval[num_vars])
return jopt
def updatepole(cpen, conmat, cval, fval, sim, simi):
#--------------------------------------------------------------------------------------------------!
# This subroutine identifies the best vertex of the current simplex with respect to the merit
# function PHI = F + CPEN * CSTRV, and then switch this vertex to SIM[:, NUM_VARS], which Powell called
# the "pole position" in his comments. CONMAT, CVAL, FVAL, and SIMI are updated accordingly.
#
# N.B. 1: In precise arithmetic, the following two procedures produce the same results:
# 1) apply UPDATEPOLE to SIM twice, first with CPEN = CPEN1 and then with CPEN = CPEN2;
# 2) apply UPDATEPOLE to SIM with CPEN = CPEN2.
# In finite-precision arithmetic, however, they may produce different results unless CPEN1 = CPEN2.
#
# N.B. 2: When JOPT == N+1, the best vertex is already at the pole position, so there is nothing to
# switch. However, as in Powell's code, the code below will check whether SIMI is good enough to
# work as the inverse of SIM(:, 1:N) or not. If not, Powell's code would invoke an error return of
# COBYLB; our implementation, however, will try calculating SIMI from scratch; if the recalculated
# SIMI is still of poor quality, then UPDATEPOLE will return with INFO = DAMAGING_ROUNDING,
# informing COBYLB that SIMI is poor due to damaging rounding errors.
#
# N.B. 3: UPDATEPOLE should be called when and only when FINDPOLE can potentially returns a value
# other than N+1. The value of FINDPOLE is determined by CPEN, CVAL, and FVAL, the latter two being
# decided by SIM. Thus UPDATEPOLE should be called after CPEN or SIM changes. COBYLA updates CPEN at
# only two places: the beginning of each trust-region iteration, and when REDRHO is called;
# SIM is updated only by UPDATEXFC, which itself calls UPDATEPOLE internally. Therefore, we only
# need to call UPDATEPOLE after updating CPEN at the beginning of each trust-region iteration and
# after each invocation of REDRHO.
# Local variables
itol = 1
# Sizes
num_constraints = conmat.shape[0]
num_vars = sim.shape[0]
# Preconditions
if DEBUGGING:
assert num_constraints >= 0
assert num_vars >= 1
assert cpen > 0
assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1
assert not (np.isnan(conmat) | np.isneginf(conmat)).any()
assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1
assert np.isfinite(sim).all()
assert all(primasum(abs(sim[:, :num_vars]), axis=0) > 0)
assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars
assert np.isfinite(simi).all()
assert isinv(sim[:, :num_vars], simi, itol)
#====================#
# Calculation starts #
#====================#
# INFO must be set, as it is an output.
info = INFO_DEFAULT
# Identify the optimal vertex of the current simplex.
jopt = findpole(cpen, cval, fval)
# Switch the best vertex to the pole position SIM[:, NUM_VARS] if it is not there already and update
# SIMI. Before the update, save a copy of SIM and SIMI. If the update is unsuccessful due to
# damaging rounding errors, we restore them and return with INFO = DAMAGING_ROUNDING.
sim_old = sim.copy()
simi_old = simi.copy()
if 0 <= jopt < num_vars:
# Unless there is a bug in FINDPOLE it is guaranteed that JOPT >= 0
# When JOPT == NUM_VARS, there is nothing to switch; in addition SIMI[JOPT, :] will be illegal.
# fval[[jopt, -1]] = fval[[-1, jopt]]
# conmat[:, [jopt, -1]] = conmat[:, [-1, jopt]] # Exchange CONMAT[:, JOPT] AND CONMAT[:, -1]
# cval[[jopt, -1]] = cval[[-1, jopt]]
sim[:, num_vars] += sim[:, jopt]
sim_jopt = sim[:, jopt].copy()
sim[:, jopt] = 0 # np.zeros(num_constraints)?
sim[:, :num_vars] -= np.tile(sim_jopt, (num_vars, 1)).T
# The above update is equivalent to multiplying SIM[:, :NUM_VARS] from the right side by a matrix whose
# JOPT-th row is [-1, -1, ..., -1], while all the other rows are the same as those of the
# identity matrix. It is easy to check that the inverse of this matrix is itself. Therefore,
# SIMI should be updated by a multiplication with this matrix (i.e. its inverse) from the left
# side, as is done in the following line. The JOPT-th row of the updated SIMI is minus the sum
# of all rows of the original SIMI, whereas all the other rows remain unchanged.
# NDB 20250114: In testing the cutest problem 'SYNTHES2' between the Python implementation and
# the Fortran bindings, I saw a difference between the following for loop and the
# np.sum command. The differences were small, on the order of 1e-16, i.e. epsilon.
# According to numpy documentation, np.sum sometimes uses partial pairwise summation,
# depending on the memory layout of the array and the axis specified.
# for i in range(simi.shape[1]):
# simi[jopt, i] = -sum(simi[:, i])
simi[jopt, :] = -primasum(simi, axis=0)
# Check whether SIMI is a poor approximation to the inverse of SIM[:, :NUM_VARS]
# Calculate SIMI from scratch if the current one is damaged by rounding errors.
erri = np.max(abs(matprod(simi, sim[:, :num_vars]) - np.eye(num_vars))) # np.max returns NaN if any input is NaN
itol = 1
if erri > 0.1 * itol or np.isnan(erri):
simi_test = inv(sim[:, :num_vars])
erri_test = np.max(abs(matprod(simi_test, sim[:, :num_vars]) - np.eye(num_vars)))
if erri_test < erri or (np.isnan(erri) and not np.isnan(erri_test)):
simi = simi_test
erri = erri_test
# If SIMI is satisfactory, then update FVAL, CONMAT, and CVAL. Otherwise restore SIM and SIMI, and
# return with INFO = DAMAGING_ROUNDING.
if erri <= itol:
if 0 <= jopt < num_vars:
fval[[jopt, num_vars]] = fval[[num_vars, jopt]]
conmat[:, [jopt, num_vars]] = conmat[:, [num_vars, jopt]]
cval[[jopt, num_vars]] = cval[[num_vars, jopt]]
else: # erri > itol or erri is NaN
info = DAMAGING_ROUNDING
sim = sim_old
simi = simi_old
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert findpole(cpen, cval, fval) == num_vars or info == DAMAGING_ROUNDING
assert np.size(conmat, 0) == num_constraints and np.size(conmat, 1) == num_vars + 1
assert not (np.isnan(conmat) | np.isneginf(conmat)).any()
assert np.size(cval) == num_vars + 1 and not any(cval < 0 | np.isnan(cval) | np.isposinf(cval))
assert np.size(fval) == num_vars + 1 and not any(np.isnan(fval) | np.isposinf(fval))
assert np.size(sim, 0) == num_vars and np.size(sim, 1) == num_vars + 1
assert np.isfinite(sim).all()
assert all(primasum(abs(sim[:, :num_vars]), axis=0) > 0)
assert np.size(simi, 0) == num_vars and np.size(simi, 1) == num_vars
assert np.isfinite(simi).all()
# Do not check SIMI = SIM[:, :num_vars]^{-1}, as it may not be true due to damaging rounding.
assert isinv(sim[:, :num_vars], simi, itol) or info == DAMAGING_ROUNDING
return conmat, cval, fval, sim, simi, info

View File

@@ -0,0 +1,34 @@
import numpy as np
from scipy.optimize import Bounds
def process_bounds(bounds, lenx0):
'''
`bounds` can either be an object with the properties lb and ub, or a list of tuples
indicating a lower bound and an upper bound for each variable. If the list contains
fewer entries than the length of x0, the remaining entries will generated as -/+ infinity.
Some examples of valid lists of tuple, assuming len(x0) == 3:
[(0, 1), (2, 3), (4, 5)] -> returns [0, 2, 4], [1, 3, 5]
[(0, 1), (None, 3)] -> returns [0, -inf, -inf], [1, 3, inf]
[(0, 1), (-np.inf, 3)] -> returns [0, -inf, -inf], [1, 3, inf]
'''
if bounds is None:
lb = np.array([-np.inf]*lenx0, dtype=np.float64)
ub = np.array([np.inf]*lenx0, dtype=np.float64)
return lb, ub
if isinstance(bounds, Bounds):
lb = np.array(bounds.lb, dtype=np.float64)
ub = np.array(bounds.ub, dtype=np.float64)
lb = np.concatenate((lb, -np.inf*np.ones(lenx0 - len(lb))))
ub = np.concatenate((ub, np.inf*np.ones(lenx0 - len(ub))))
return lb, ub
# If neither of the above conditions are true, we assume that bounds is a list of tuples
lb = np.array([bound[0] if bound[0] is not None else -np.inf for bound in bounds], dtype=np.float64)
ub = np.array([bound[1] if bound[1] is not None else np.inf for bound in bounds], dtype=np.float64)
# If there were fewer bounds than variables, pad the rest with -/+ infinity
lb = np.concatenate((lb, -np.inf*np.ones(lenx0 - len(lb))))
ub = np.concatenate((ub, np.inf*np.ones(lenx0 - len(ub))))
return lb, ub

View File

@@ -0,0 +1,46 @@
import numpy as np
from scipy.optimize import LinearConstraint
def combine_multiple_linear_constraints(constraints):
full_A = constraints[0].A
full_lb = constraints[0].lb
full_ub = constraints[0].ub
for constraint in constraints[1:]:
full_A = np.concatenate((full_A, constraint.A), axis=0)
full_lb = np.concatenate((full_lb, constraint.lb), axis=0)
full_ub = np.concatenate((full_ub, constraint.ub), axis=0)
return LinearConstraint(full_A, full_lb, full_ub)
def separate_LC_into_eq_and_ineq(linear_constraint):
# The Python interface receives linear constraints lb <= A*x <= ub, but the
# Fortran backend of PRIMA expects that the linear constraints are specified
# as A_eq*x = b_eq, A_ineq*x <= b_ineq.
# As such, we must:
# 1. for constraints with lb == ub, rewrite them as A_eq*x = lb;
# 2. for constraints with lb < ub, rewrite them as A_ineq*x <= b_ineq.
# We suppose lb == ub if ub <= lb + 2*epsilon, assuming that the preprocessing
# ensures lb <= ub.
epsilon = np.finfo(np.float64).eps
eq_indices = (linear_constraint.ub <= (linear_constraint.lb + 2*epsilon))
A_eq = linear_constraint.A[eq_indices]
b_eq = (linear_constraint.lb[eq_indices] + linear_constraint.ub[eq_indices])/2.0
ineq_lb_indices = (linear_constraint.lb > -np.inf)
A_ineq_lb = -linear_constraint.A[~eq_indices & ineq_lb_indices]
b_ineq_lb = -linear_constraint.lb[~eq_indices & ineq_lb_indices]
ineq_ub_indices = (linear_constraint.ub < np.inf)
A_ineq_ub = linear_constraint.A[~eq_indices & ineq_ub_indices]
b_ineq_ub = linear_constraint.ub[~eq_indices & ineq_ub_indices]
A_ineq = np.concatenate((A_ineq_lb, A_ineq_ub))
b_ineq = np.concatenate((b_ineq_lb, b_ineq_ub))
# Ensure dtype is float64, or set to None if empty
A_eq = np.array(A_eq, dtype=np.float64) if len(A_eq) > 0 else None
b_eq = np.array(b_eq, dtype=np.float64) if len(b_eq) > 0 else None
A_ineq = np.array(A_ineq, dtype=np.float64) if len(A_ineq) > 0 else None
b_ineq = np.array(b_ineq, dtype=np.float64) if len(b_ineq) > 0 else None
return A_eq, b_eq, A_ineq, b_ineq

View File

@@ -0,0 +1,54 @@
import numpy as np
def transform_constraint_function(nlc):
'''
The Python interfaces receives the constraints as lb <= constraint(x) <= ub,
but the Fortran backend expects the nonlinear constraints to be constraint(x) <= 0.
Thus a conversion is needed.
In addition to the conversion, we add a check to ensure that the provided lower/upper bounds
have a shape consistent with the output of the constraint function.
'''
def newconstraint(x):
values = np.atleast_1d(np.array(nlc.fun(x), dtype=np.float64))
# Upgrade the lower/upper bounds to vectors if necessary
lb = nlc.lb
try:
_ = len(lb)
except TypeError:
lb = np.array([nlc.lb]*len(values), dtype=np.float64)
ub = nlc.ub
try:
_ = len(ub)
except TypeError:
ub = np.array([nlc.ub]*len(values), dtype=np.float64)
# Check the shapes and raise an exception if they do not match
if len(values) != len(lb):
raise ValueError("The number of elements in the constraint function's output does not match the number of elements in the lower bound.")
if len(values) != len(ub):
raise ValueError("The number of elements in the constraint function's output does not match the number of elements in the upper bound.")
# Combine the upper and lower bounds to transform the function into the form
# expected by the Fortran backend.
return np.concatenate(([lb_ii - vi for lb_ii, vi in zip(lb, values) if lb_ii > -np.inf],
[vi - ub_ii for ub_ii, vi in zip(ub, values) if ub_ii < np.inf],
))
return newconstraint
def process_nl_constraints(nlcs):
functions = []
for nlc in nlcs:
fun_i = transform_constraint_function(nlc)
functions.append(fun_i)
def constraint_function(x):
values = np.empty(0, dtype=np.float64)
for fun in functions:
values = np.concatenate((values, fun(x)))
return values
return constraint_function

View File

@@ -0,0 +1,173 @@
'''
This module provides the _project function that attempts to project the initial guess
onto the feasible set.
Adapted from the corresponding function in the PDFO package (https://www.pdfo.net) by
Tom M. Ragonneau (https://ragonneau.github.io) and Zaikun Zhang (https://www.zhangzk.net).
'''
import numpy as np
from ._linear_constraints import LinearConstraint
from scipy.optimize import OptimizeResult
# All the accepted scalar types; np.generic correspond to all NumPy types.
scalar_types = (int, float, np.generic)
eps = np.finfo(np.float64).eps
def _project(x0, lb, ub, constraints):
"""Projection of the initial guess onto the feasible set.
Parameters
----------
x0: ndarray, shape (n,)
The same as in prepdfo.
lb: ndarray, shape (n,)
The same as in prepdfo.
ub: ndarray, shape (n,)
The same as in prepdfo.
constraints: dict
The general constraints of the problem, defined as a dictionary with
fields:
linear: LinearConstraint
The linear constraints of the problem.
nonlinear: dict
The nonlinear constraints of the problem. When ``_project`` is called, the nonlinear constraints are
None.
Returns
-------
result: OptimizeResult
The result of the projection.
Authors
-------
Tom M. RAGONNEAU (ragonneau.github.io)
and Zaikun ZHANG (www.zhangzk.net)
Dedicated to the late Professor M. J. D. Powell FRS (1936--2015).
"""
invoker = 'prima'
# Validate x0.
if isinstance(x0, scalar_types):
x0_c = [x0]
elif hasattr(x0, '__len__'):
x0_c = x0
else:
raise ValueError('{}: UNEXPECTED ERROR: x0 should be a vector.'.format(invoker))
try:
x0_c = np.asarray(x0_c, dtype=np.float64)
except ValueError:
raise ValueError('{}: UNEXPECTED ERROR: x0 should contain only scalars.'.format(invoker))
if len(x0_c.shape) != 1:
raise ValueError('{}: UNEXPECTED ERROR: x0 should be a vector.'.format(invoker))
lenx0 = x0_c.size
# Validate lb.
if isinstance(lb, scalar_types):
lb_c = [lb]
elif hasattr(lb, '__len__'):
lb_c = lb
else:
raise ValueError('{}: UNEXPECTED ERROR: lb should be a vector.'.format(invoker))
try:
lb_c = np.asarray(lb_c, dtype=np.float64)
except ValueError:
raise ValueError('{}: UNEXPECTED ERROR: lb should contain only scalars.'.format(invoker))
if len(lb_c.shape) != 1 or lb_c.size != lenx0:
raise ValueError('{}: UNEXPECTED ERROR: the size of lb is inconsistent with x0.'.format(invoker))
# Validate ub.
if isinstance(ub, scalar_types):
ub_c = [ub]
elif hasattr(ub, '__len__'):
ub_c = ub
else:
raise ValueError('{}: UNEXPECTED ERROR: ub should be a vector.'.format(invoker))
try:
ub_c = np.asarray(ub_c, dtype=np.float64)
except ValueError:
raise ValueError('{}: UNEXPECTED ERROR: ub should contain only scalars.'.format(invoker))
if len(ub_c.shape) != 1 or ub_c.size != lenx0:
raise ValueError('{}: UNEXPECTED ERROR: the size of ub is inconsistent with x0.'.format(invoker))
# Validate constraints.
if not isinstance(constraints, dict) or not ({'linear', 'nonlinear'} <= set(constraints.keys())) or \
not (isinstance(constraints['linear'], LinearConstraint) or constraints['linear'] is None):
# the nonlinear constraints will not be taken into account in this function and are, therefore, not validated
raise ValueError('{}: UNEXPECTED ERROR: The constraints are ill-defined.'.format(invoker))
max_con = 1e20 # Decide whether an inequality constraint can be ignored
# Project onto the feasible set.
if constraints['linear'] is None:
# Direct projection onto the bound constraints
x_proj = np.nanmin((np.nanmax((x0_c, lb_c), axis=0), ub_c), axis=0)
return OptimizeResult(x=x_proj)
elif all(np.less_equal(np.abs(constraints['linear'].ub - constraints['linear'].lb), eps)) and \
np.max(lb_c) <= -max_con and np.min(ub_c) >= max_con:
# The linear constraints are all equality constraints. The projection can therefore be done by solving the
# least-squares problem: min ||A*x - (b - A*x_0)||.
a = constraints['linear'].A
b = (constraints['linear'].lb + constraints['linear'].ub) / 2
xi, _, _, _ = np.linalg.lstsq(a, b - np.dot(a, x0_c), rcond=None)
# The problem is not bounded. However, if the least-square solver returned values bigger in absolute value
# than max_con, they will be reduced to this bound.
x_proj = np.nanmin((np.nanmax((x0_c + xi, lb_c), axis=0), ub_c), axis=0)
return OptimizeResult(x=x_proj)
if constraints['linear'] is not None:
try:
# Project the initial guess onto the linear constraints via SciPy.
from scipy.optimize import minimize
from scipy.optimize import Bounds as ScipyBounds
from scipy.optimize import LinearConstraint as ScipyLinearConstraint
linear = constraints['linear']
# To be more efficient, SciPy asks to separate the equality and the inequality constraints into two
# different LinearConstraint structures
pc_args_ineq, pc_args_eq = dict(), dict()
pc_args_ineq['A'], pc_args_eq['A'] = np.asarray([[]]), np.asarray([[]])
pc_args_ineq['A'] = pc_args_ineq['A'].reshape(0, linear.A.shape[1])
pc_args_eq['A'] = pc_args_eq['A'].reshape(0, linear.A.shape[1])
pc_args_ineq['lb'], pc_args_eq['lb'] = np.asarray([]), np.asarray([])
pc_args_ineq['ub'], pc_args_eq['ub'] = np.asarray([]), np.asarray([])
for i in range(linear.lb.size):
if linear.lb[i] != linear.ub[i]:
pc_args_ineq['A'] = np.concatenate((pc_args_ineq['A'], linear.A[i:i+1, :]), axis=0)
pc_args_ineq['lb'] = np.r_[pc_args_ineq['lb'], linear.lb[i]]
pc_args_ineq['ub'] = np.r_[pc_args_ineq['ub'], linear.ub[i]]
else:
pc_args_eq['A'] = np.concatenate((pc_args_eq['A'], linear.A[i:i+1, :]), axis=0)
pc_args_eq['lb'] = np.r_[pc_args_eq['lb'], linear.lb[i]]
pc_args_eq['ub'] = np.r_[pc_args_eq['ub'], linear.ub[i]]
if pc_args_ineq['A'].size > 0 and pc_args_ineq['lb'].size > 0 and pc_args_eq['lb'].size > 0:
project_constraints = [ScipyLinearConstraint(**pc_args_ineq), ScipyLinearConstraint(**pc_args_eq)]
elif pc_args_ineq['A'].size > 0 and pc_args_ineq['lb'].size > 0:
project_constraints = ScipyLinearConstraint(**pc_args_ineq)
elif pc_args_eq['A'].size > 0:
project_constraints = ScipyLinearConstraint(**pc_args_eq)
else:
project_constraints = ()
# Perform the actual projection.
ax_ineq = np.dot(pc_args_ineq['A'], x0_c)
ax_eq = np.dot(pc_args_eq['A'], x0_c)
if np.greater(ax_ineq, pc_args_ineq['ub']).any() or np.greater(pc_args_ineq['lb'], ax_ineq).any() or \
np.not_equal(ax_eq, pc_args_eq['lb']).any() or \
np.greater(x0_c, ub_c).any() or np.greater(lb_c, x0_c).any():
return minimize(lambda x: np.dot(x - x0_c, x - x0_c) / 2, x0_c, jac=lambda x: (x - x0_c),
bounds=ScipyBounds(lb_c, ub_c), constraints=project_constraints)
else:
# Do not perform any projection if the initial guess is feasible.
return OptimizeResult(x=x0_c)
except ImportError:
return OptimizeResult(x=x0_c)
return OptimizeResult(x=x0_c)

View File

@@ -0,0 +1,93 @@
'''
This module checks whether to break out of the solver loop.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
from .infos import INFO_DEFAULT, NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED
import numpy as np
def checkbreak_unc(maxfun, nf, f, ftarget, x):
'''
This module checks whether to break out of the solver loop in the unconstrained case.
'''
# Outputs
info = INFO_DEFAULT
# Local variables
srname = "CHECKbreak_UNC"
# Preconditions
assert INFO_DEFAULT not in [NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED], f'NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, and MAXFUN_REACHED differ from INFO_DFT {srname}'
# X does not contain NaN if the initial X does not contain NaN and the subroutines generating
# trust-region/geometry steps work properly so that they never produce a step containing NaN/Inf.
assert not any(np.isnan(x)), f'X does not contain NaN {srname}'
# With the moderated extreme barrier, F cannot be NaN/+Inf.
assert not (any(np.isnan(f)) or any(np.isposinf(f))), f'F is not NaN/+Inf {srname}'
#====================#
# Calculation starts #
#====================#
# Although X should not contain NaN unless there is a bug, we include the following for security.
# X can be Inf, as finite + finite can be Inf numerically.
if any(np.isnan(x)) or any(np.isinf(x)):
info = NAN_INF_X
# Although NAN_INF_F should not happen unless there is a bug, we include the following for security.
if any(np.isnan(f)) or any(np.isposinf(f)):
info = NAN_INF_F
if f <= ftarget:
info = FTARGET_ACHIEVED
if nf >= maxfun:
info = MAXFUN_REACHED
return info
def checkbreak_con(maxfun, nf, cstrv, ctol, f, ftarget, x):
'''
This module checks whether to break out of the solver loop in the constrained case.
'''
# Outputs
info = INFO_DEFAULT
# Local variables
srname = "CHECKbreak_CON"
# Preconditions
assert INFO_DEFAULT not in [NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, MAXFUN_REACHED], f'NAN_INF_X, NAN_INF_F, FTARGET_ACHIEVED, and MAXFUN_REACHED differ from INFO_DFT {srname}'
# X does not contain NaN if the initial X does not contain NaN and the subroutines generating
# trust-region/geometry steps work properly so that they never produce a step containing NaN/Inf.
assert not any(np.isnan(x)), f'X does not contain NaN {srname}'
# With the moderated extreme barrier, F or CSTRV cannot be NaN/+Inf.
assert not (np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv)), f'F or CSTRV is not NaN/+Inf {srname}'
#====================#
# Calculation starts #
#====================#
# Although X should not contain NaN unless there is a bug, we include the following for security.
# X can be Inf, as finite + finite can be Inf numerically.
if any(np.isnan(x)) or any(np.isinf(x)):
info = NAN_INF_X
# Although NAN_INF_F should not happen unless there is a bug, we include the following for security.
if np.isnan(f) or np.isposinf(f) or np.isnan(cstrv) or np.isposinf(cstrv):
info = NAN_INF_F
if cstrv <= ctol and f <= ftarget:
info = FTARGET_ACHIEVED
if nf >= maxfun:
info = MAXFUN_REACHED
return info

View File

@@ -0,0 +1,47 @@
'''
This is a module defining some constants.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
import numpy as np
import os
DEBUGGING = bool(os.getenv('PRIMA_DEBUGGING'))
REALMIN = np.finfo(float).tiny
REALMAX = np.finfo(float).max
FUNCMAX = 10.0**30
CONSTRMAX = FUNCMAX
EPS = np.finfo(float).eps
# Any bound with an absolute value at least BOUNDMAX is considered as no bound.
BOUNDMAX = REALMAX/4
# Some default values
RHOBEG_DEFAULT = 1
RHOEND_DEFAULT = 1e-6
FTARGET_DEFAULT = -REALMAX
CTOL_DEFAULT = np.sqrt(EPS)
CWEIGHT_DEFAULT = 1e8
ETA1_DEFAULT = 0.1
ETA2_DEFAULT = 0.7
GAMMA1_DEFAULT = 0.5
GAMMA2_DEFAULT = 2
IPRINT_DEFAULT = 0
MAXFUN_DIM_DEFAULT = 500
PRIMA_MAX_HIST_MEM_MB = 300 # 1MB > 10^5*REAL64. 100 can be too small.
# Maximal amount of memory (Byte) allowed for XHIST, FHIST, CONHIST, CHIST, and the filters.
MHM = PRIMA_MAX_HIST_MEM_MB * 10**6
# Make sure that MAXHISTMEM does not exceed HUGE(0) to avoid overflow and memory errors.
MAXHISTMEM = min(MHM, np.iinfo(np.int32).max)
# Maximal length of the filter used in constrained solvers.
MIN_MAXFILT = 200 # Should be positive; < 200 is not recommended.
MAXFILT_DEFAULT = 10 * MIN_MAXFILT

View File

@@ -0,0 +1,99 @@
'''
This is a module evaluating the objective/constraint function with Nan/Inf handling.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
import numpy as np
from .consts import FUNCMAX, CONSTRMAX, REALMAX, DEBUGGING
from .linalg import matprod, primasum
# This is a module evaluating the objective/constraint function with Nan/Inf handling.
def moderatex(x):
'''
This function moderates a decision variable. It replaces NaN by 0 and Inf/-Inf by
REALMAX/-REALMAX.
'''
x[np.isnan(x)] = 0
x = np.clip(x, -REALMAX, REALMAX)
return x
def moderatef(f):
"""
This function moderates the function value of a MINIMIZATION problem. It replaces
NaN and any value above FUNCMAX by FUNCMAX.
"""
f = FUNCMAX if np.isnan(f) else f
f = np.clip(f, -REALMAX, FUNCMAX)
# We may moderate huge negative function values as follows, but we decide not to.
# f = np.clip(f, -FUNCMAX, FUNCMAX)
return f
def moderatec(c):
"""
This function moderates the constraint value, the constraint demanding this value
to be NONNEGATIVE. It replaces any value below -CONSTRMAX by -CONSTRMAX, and any
NaN or value above CONSTRMAX by CONSTRMAX.
"""
np.nan_to_num(c, copy=False, nan=CONSTRMAX)
c = np.clip(c, -CONSTRMAX, CONSTRMAX)
return c
def evaluate(calcfc, x, m_nlcon, amat, bvec):
"""
This function evaluates CALCFC at X, returning the objective function value and the
constraint value. Nan/Inf are handled by a moderated extreme barrier.
"""
# Sizes
m_lcon = len(bvec) if bvec is not None else 0
# Preconditions
if DEBUGGING:
# X should not contain NaN if the initial X does not contain NaN and the
# subroutines generating # trust-region/geometry steps work properly so that
# they never produce a step containing NaN/Inf.
assert not any(np.isnan(x))
#====================#
# Calculation starts #
#====================#
constr = np.zeros(m_lcon + m_nlcon)
if amat is not None:
constr[:m_lcon] = matprod(x, amat.T) - bvec
if any(np.isnan(x)):
# Although this should not happen unless there is a bug, we include this case
# for robustness.
f = primasum(x)
constr = np.ones(m_nlcon) * f
else:
f, constr[m_lcon:] = calcfc(moderatex(x))
# Moderated extreme barrier: replace NaN/huge objective or constraint values
# with a large but finite value. This is naive, and better approaches surely
# exist.
f = moderatef(f)
constr[m_lcon:] = moderatec(constr[m_lcon:])
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
# With X not containing NaN, and with the moderated extreme barrier, F cannot
# be NaN/+Inf, and CONSTR cannot be NaN/-Inf.
assert not (np.isnan(f) or np.isposinf(f))
assert not any(np.isnan(constr) | np.isposinf(constr))
return f, constr

View File

@@ -0,0 +1,38 @@
'''
This module provides subroutines that handle the X/F/C histories of the solver, taking into
account that MAXHIST may be smaller than NF.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
def savehist(maxhist, x, xhist, f, fhist, cstrv, chist, constr, conhist):
'''
Save the data values to the history lists.
The implementation of this function is vastly different from the Fortran implementation.
This is mostly due to the ease of creating and appending to lists in Python
However just like the Fortran version we should be concerned about both performance
and memory constraints. It will probably be better to initialize an array of NaN for
each of the histories and keep track of how many indices we have stored. Not needed for
the moment.
'''
if len(xhist) < maxhist:
xhist.append(x)
fhist.append(f)
chist.append(cstrv)
conhist.append(constr)
else:
# This effectively accomplishes what rangehist does in the Fortran implementation
xhist.pop(0)
fhist.pop(0)
chist.pop(0)
conhist.pop(0)
xhist.append(x)
fhist.append(f)
chist.append(cstrv)
conhist.append(constr)

View File

@@ -0,0 +1,30 @@
'''
This is a module defining exit flags.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
INFO_DEFAULT = 0
SMALL_TR_RADIUS = 0
FTARGET_ACHIEVED = 1
TRSUBP_FAILED = 2
MAXFUN_REACHED = 3
MAXTR_REACHED = 20
NAN_INF_X = -1
NAN_INF_F = -2
NAN_INF_MODEL = -3
NO_SPACE_BETWEEN_BOUNDS = 6
DAMAGING_ROUNDING = 7
ZERO_LINEAR_CONSTRAINT = 8
CALLBACK_TERMINATE = 30
# Stop-codes.
# The following codes are used by ERROR STOP as stop-codes, which should be default integers.
INVALID_INPUT = 100
ASSERTION_FAILS = 101
VALIDATION_FAILS = 102
MEMORY_ALLOCATION_FAILS = 103

View File

@@ -0,0 +1,435 @@
'''
This module provides some basic linear algebra procedures.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
import numpy as np
from .consts import DEBUGGING, EPS, REALMAX, REALMIN
from .present import present
# We use naive implementations of matrix multiplication and other routines for two
# reasons:
# 1. When Fortran is compiled in debug mode, and Python is using these routines, we
# can get bit for bit identical results as compared to Fortran. This is helpful
# for comparing the two implementations. It will be particularly helpful when porting
# the other implementations like LINCOA, etc.
# 2. On some problems this algorithm is very sensitive to errors in finite precision
# arithmetic. Switching to naive implementation will slow down the algorithm, but
# may be more stable.
USE_NAIVE_MATH = False
def inprod(x, y):
if not USE_NAIVE_MATH:
return np.dot(x, y)
result = 0
for i in range(len(x)):
result += x[i] * y[i]
return result
def matprod12(x, y):
result = np.zeros(y.shape[1])
for i in range(y.shape[1]):
result[i] = inprod(x, y[:, i])
return result
def matprod21(x, y):
result = np.zeros(x.shape[0])
for i in range(x.shape[1]):
result += x[:, i] * y[i]
return result
def matprod22(x, y):
result = np.zeros((x.shape[0], y.shape[1]))
for i in range(y.shape[1]):
for j in range(x.shape[1]):
result[:, j] += x[:, i] * y[i, j]
return result
def matprod(x, y):
if not USE_NAIVE_MATH:
return x@y
if len(x.shape) == 1 and len(y.shape) == 1:
return inprod(x, y)
elif len(x.shape) == 1 and len(y.shape) == 2:
return matprod12(x, y)
elif len(x.shape) == 2 and len(y.shape) == 1:
return matprod21(x, y)
elif len(x.shape) == 2 and len(y.shape) == 2:
return matprod22(x, y)
else:
raise ValueError(f'Invalid shapes for x and y: {x.shape} and {y.shape}')
def outprod(x, y):
if not USE_NAIVE_MATH:
return np.outer(x, y)
result = np.zeros((len(x), len(y)))
for i in range(len(x)):
result[:, i] = x * y[i]
return result
def lsqr(A, b, Q, Rdiag):
if not USE_NAIVE_MATH:
return np.linalg.lstsq(A, b, rcond=None)[0]
m = A.shape[0]
n = A.shape[1]
rank = min(m, n)
x = np.zeros(n)
y = b.copy()
for i in range(rank - 1, -1, -1):
yq = inprod(y, Q[:, i])
yqa = inprod(np.abs(y), np.abs(Q[:, i]))
if isminor(yq, yqa):
x[i] = 0
else:
x[i] = yq / Rdiag[i]
y = y - x[i] * A[:, i]
return x
def hypot(x1, x2):
if not USE_NAIVE_MATH:
return np.hypot(x1, x2)
if not np.isfinite(x1):
r = abs(x1)
elif not np.isfinite(x2):
r = abs(x2)
else:
y = abs(np.array([x1, x2]))
y = np.array([min(y), max(y)])
if y[0] > np.sqrt(REALMIN) and y[1] < np.sqrt(REALMAX/2.1):
r = np.sqrt(sum(y*y))
elif y[1] > 0:
r = y[1] * np.sqrt((y[0]/y[1])*(y[0]/y[1]) + 1)
else:
r = 0
return r
def norm(x):
if not USE_NAIVE_MATH:
return np.linalg.norm(x)
# NOTE: Avoid np.pow! And exponentiation in general!
# It appears that in Fortran, x*x and x**2 are the same, but in Python they are not!
# Try it with x = 5 - 1e-15
result = np.sqrt(sum([xi*xi for xi in x]))
return result
def istril(A, tol=0):
return primasum(abs(A) - np.tril(abs(A))) <= tol
def istriu(A, tol=0):
return primasum(abs(A) - np.triu(abs(A))) <= tol
def inv(A):
if not USE_NAIVE_MATH:
return np.linalg.inv(A)
A = A.copy()
n = A.shape[0]
if istril(A):
# This case is invoked in COBYLA.
R = A.T
B = np.zeros((n, n))
for i in range(n):
B[i, i] = 1 / R[i, i]
B[:i, i] = -matprod(B[:i, :i], R[:i, i]) / R[i, i]
return B.T
elif istriu(A):
B = np.zeros((n, n))
for i in range(n):
B[i, i] = 1 / A[i, i]
B[:i, i] = -matprod(B[:i, :i], A[:i, i]) / A[i, i]
else:
# This is NOT the best algorithm for the inverse, but since the QR subroutine is available ...
Q, R, P = qr(A)
R = R.T
B = np.zeros((n, n))
for i in range(n - 1, -1, -1):
B[:, i] = (Q[:, i] - matprod(B[:, i + 1:n], R[i + 1:n, i])) / R[i, i]
InvP = np.zeros(n, dtype=int)
InvP[P] = np.linspace(0, n-1, n)
B = B[:, InvP].T
return B
def qr(A):
m = A.shape[0]
n = A.shape[1]
Q = np.eye(m)
T = A.T
P = np.linspace(0, n-1, n, dtype=int)
for j in range(n):
k = np.argmax(primasum(primapow2(T[j:n+1, j:m+1]), axis=1), axis=0)
if k > 0 and k <= n - j - 1:
k += j
P[j], P[k] = P[k], P[j]
T[[j, k], :] = T[[k, j], :]
for i in range(m-1, j, -1):
G = planerot(T[j, [j, i]]).T
T[j, [j, i]] = np.append(hypot(T[j, j], T[j, i]), 0)
T[j + 1:n + 1, [j, i]] = matprod(T[j + 1:n + 1, [j, i]], G)
Q[:, [j, i]] = matprod(Q[:, [j, i]], G)
R = T.T
return Q, R, P
def primasum(x, axis=None):
'''
According to its documentation, np.sum will sometimes do partial pairwise summation.
For our purposes, when comparing, we want don't want to do anything fancy, and we
just want to add things up one at a time.
'''
if not USE_NAIVE_MATH:
return np.sum(x, axis=axis)
if axis is None:
if x.ndim == 2:
# Sum columns first, then sum the result
return sum(primasum(x, axis=0))
else:
return sum(x)
elif axis == 0:
result = np.zeros(x.shape[1])
for i in range(x.shape[1]):
result[i] = sum(x[:, i])
return result
elif axis == 1:
result = np.zeros(x.shape[0])
for i in range(x.shape[0]):
result[i] = sum(x[i, :])
return result
def primapow2(x):
'''
Believe it or now, x**2 is not always the same as x*x in Python. In Fortran they
appear to be identical. Here's a quick one-line to find an example on your system
(well, two liner after importing numpy):
list(filter(lambda x: x[1], [(x:=np.random.random(), x**2 - x*x != 0) for _ in range(10000)]))
'''
return x*x
def planerot(x):
'''
As in MATLAB, planerot(x) returns a 2x2 Givens matrix G for x in R2 so that Y=G@x has Y[1] = 0.
Roughly speaking, G = np.array([[x[0]/R, x[1]/R], [-x[1]/R, x[0]/R]]), where R = np.linalg.norm(x).
0. We need to take care of the possibilities of R=0, Inf, NaN, and over/underflow.
1. The G defined above is continuous with respect to X except at 0. Following this definition,
G = np.array([[np.sign(x[0]), 0], [0, np.sign(x[0])]]) if x[1] == 0,
G = np.array([[0, np.sign(x[1])], [np.sign(x[1]), 0]]) if x[0] == 0
Yet some implementations ignore the signs, leading to discontinuity and numerical instability.
2. Difference from MATLAB: if x contains NaN of consists of only Inf, MATLAB returns a NaN matrix,
but we return an identity matrix or a matrix of +/-np.sqrt(2). We intend to keep G always orthogonal.
'''
# Preconditions
if DEBUGGING:
assert len(x) == 2, "x must be a 2-vector"
# ==================
# Calculation starts
# ==================
# Define C = X(1) / R and S = X(2) / R with R = HYPOT(X(1), X(2)). Handle Inf/NaN, over/underflow.
if (any(np.isnan(x))):
# In this case, MATLAB sets G to NaN(2, 2). We refrain from doing so to keep G orthogonal.
c = 1
s = 0
elif (all(np.isinf(x))):
# In this case, MATLAB sets G to NaN(2, 2). We refrain from doing so to keep G orthogonal.
c = 1 / np.sqrt(2) * np.sign(x[0])
s = 1 / np.sqrt(2) * np.sign(x[1])
elif (abs(x[0]) <= 0 and abs(x[1]) <= 0): # X(1) == 0 == X(2).
c = 1
s = 0
elif (abs(x[1]) <= EPS * abs(x[0])):
# N.B.:
# 0. With <= instead of <, this case covers X(1) == 0 == X(2), which is treated above separately
# to avoid the confusing SIGN(., 0) (see 1).
# 1. SIGN(A, 0) = ABS(A) in Fortran but sign(0) = 0 in MATLAB, Python, Julia, and R#
# 2. Taking SIGN(X(1)) into account ensures the continuity of G with respect to X except at 0.
c = np.sign(x[0])
s = 0
elif (abs(x[0]) <= EPS * abs(x[1])):
# N.B.: SIGN(A, X) = ABS(A) * sign of X /= A * sign of X # Therefore, it is WRONG to define G
# as SIGN(RESHAPE([ZERO, -ONE, ONE, ZERO], [2, 2]), X(2)). This mistake was committed on
# 20211206 and took a whole day to debug! NEVER use SIGN on arrays unless you are really sure.
c = 0
s = np.sign(x[1])
else:
# Here is the normal case. It implements the Givens rotation in a stable & continuous way as in:
# Bindel, D., Demmel, J., Kahan, W., and Marques, O. (2002). On computing Givens rotations
# reliably and efficiently. ACM Transactions on Mathematical Software (TOMS), 28(2), 206-238.
# N.B.: 1. Modern compilers compute SQRT(REALMIN) and SQRT(REALMAX/2.1) at compilation time.
# 2. The direct calculation without involving T and U seems to work better; use it if possible.
if (all(np.logical_and(np.sqrt(REALMIN) < np.abs(x), np.abs(x) < np.sqrt(REALMAX / 2.1)))):
# Do NOT use HYPOTENUSE here; the best implementation for one may be suboptimal for the other
r = norm(x)
c = x[0] / r
s = x[1] / r
elif (abs(x[0]) > abs(x[1])):
t = x[1] / x[0]
u = max(1, abs(t), np.sqrt(1 + t*t)) # MAXVAL: precaution against rounding error.
u *= np.sign(x[0]) ##MATLAB: u = sign(x(1))*sqrt(1 + t**2)
c = 1 / u
s = t / u
else:
t = x[0] / x[1]
u = max([1, abs(t), np.sqrt(1 + t*t)]) # MAXVAL: precaution against rounding error.
u *= np.sign(x[1]) ##MATLAB: u = sign(x(2))*sqrt(1 + t**2)
c = t / u
s = 1 / u
G = np.array([[c, s], [-s, c]]) # MATLAB: G = [c, s; -s, c]
#====================#
# Calculation ends #
#====================#
# Postconditions
if DEBUGGING:
assert G.shape == (2,2)
assert np.all(np.isfinite(G))
assert abs(G[0, 0] - G[1, 1]) + abs(G[0, 1] + G[1, 0]) <= 0
tol = np.maximum(1.0E-10, np.minimum(1.0E-1, 1.0E6 * EPS))
assert isorth(G, tol)
if all(np.logical_and(np.isfinite(x), np.abs(x) < np.sqrt(REALMAX / 2.1))):
r = np.linalg.norm(x)
assert max(abs(G@x - [r, 0])) <= max(tol, tol * r), 'G @ X = [||X||, 0]'
return G
def isminor(x, ref):
'''
This function tests whether x is minor compared to ref. It is used by Powell, e.g., in COBYLA.
In precise arithmetic, isminor(x, ref) is true if and only if x == 0; in floating point
arithmetic, isminor(x, ref) is true if x is 0 or its nonzero value can be attributed to
computer rounding errors according to ref.
Larger sensitivity means the function is more strict/precise, the value 0.1 being due to Powell.
For example:
isminor(1e-20, 1e300) -> True, because in floating point arithmetic 1e-20 cannot be added to
1e300 without being rounded to 1e300.
isminor(1e300, 1e-20) -> False, because in floating point arithmetic adding 1e300 to 1e-20
dominates the latter number.
isminor(3, 4) -> False, because 3 can be added to 4 without being rounded off
'''
sensitivity = 0.1
refa = abs(ref) + sensitivity * abs(x)
refb = abs(ref) + 2 * sensitivity * abs(x)
return np.logical_or(abs(ref) >= refa, refa >= refb)
def isinv(A, B, tol=None):
'''
This procedure tests whether A = B^{-1} up to the tolerance TOL.
'''
# Sizes
n = np.size(A, 0)
# Preconditions
if DEBUGGING:
assert np.size(A, 0) == np.size(A, 1)
assert np.size(B, 0) == np.size(B, 1)
assert np.size(A, 0) == np.size(B, 0)
if present(tol):
assert tol >= 0
#====================#
# Calculation starts #
#====================#
tol = tol if present(tol) else np.minimum(1e-3, 1e2 * EPS * np.maximum(np.size(A, 0), np.size(A, 1)))
tol = np.max([tol, tol * np.max(abs(A)), tol * np.max(abs(B))])
is_inv = ((abs(matprod(A, B)) - np.eye(n)) <= tol).all() or ((abs(matprod(B, A) - np.eye(n))) <= tol).all()
#===================#
# Calculation ends #
#===================#
return is_inv
def isorth(A, tol=None):
'''
This function tests whether the matrix A has orthonormal columns up to the tolerance TOL.
'''
# Preconditions
if DEBUGGING:
if present(tol):
assert tol >= 0
#====================#
# Calculation starts #
#====================#
num_vars = np.size(A, 1)
if num_vars > np.size(A, 0):
is_orth = False
elif (np.isnan(primasum(abs(A)))):
is_orth = False
else:
if present(tol):
is_orth = (abs(matprod(A.T, A) - np.eye(num_vars)) <= np.maximum(tol, tol * np.max(abs(A)))).all()
else:
is_orth = (abs(matprod(A.T, A) - np.eye(num_vars)) <= 0).all()
#====================#
# Calculation ends #
#====================#
return is_orth
def get_arrays_tol(*arrays):
"""
Get a relative tolerance for a set of arrays. Borrowed from COBYQA
Parameters
----------
*arrays: tuple
Set of `numpy.ndarray` to get the tolerance for.
Returns
-------
float
Relative tolerance for the set of arrays.
Raises
------
ValueError
If no array is provided.
"""
if len(arrays) == 0:
raise ValueError("At least one array must be provided.")
size = max(array.size for array in arrays)
weight = max(
np.max(np.abs(array[np.isfinite(array)]), initial=1.0)
for array in arrays
)
return 10.0 * EPS * max(size, 1.0) * weight

View File

@@ -0,0 +1,290 @@
'''
This module provides some functions that print messages to terminal/files.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
N.B.:
1. In case parallelism is desirable (especially during initialization), the functions may
have to be modified or disabled due to the IO operations.
2. IPRINT indicates the level of verbosity, which increases with the absolute value of IPRINT.
IPRINT = +/-3 can be expensive due to high IO operations.
'''
from .consts import DEBUGGING
from .infos import FTARGET_ACHIEVED, MAXFUN_REACHED, MAXTR_REACHED, \
SMALL_TR_RADIUS, TRSUBP_FAILED, NAN_INF_F, NAN_INF_X, NAN_INF_MODEL, DAMAGING_ROUNDING, \
NO_SPACE_BETWEEN_BOUNDS, ZERO_LINEAR_CONSTRAINT, CALLBACK_TERMINATE
from .present import present
import numpy as np
spaces = ' '
def get_info_string(solver, info):
if info == FTARGET_ACHIEVED:
reason = 'the target function value is achieved.'
elif info == MAXFUN_REACHED:
reason = 'the objective function has been evaluated MAXFUN times.'
elif info == MAXTR_REACHED:
reason = 'the maximal number of trust region iterations has been reached.'
elif info == SMALL_TR_RADIUS:
reason = 'the trust region radius reaches its lower bound.'
elif info == TRSUBP_FAILED:
reason = 'a trust region step has failed to reduce the quadratic model.'
elif info == NAN_INF_X:
reason = 'NaN or Inf occurs in x.'
elif info == NAN_INF_F:
reason = 'the objective function returns NaN/+Inf.'
elif info == NAN_INF_MODEL:
reason = 'NaN or Inf occurs in the models.'
elif info == DAMAGING_ROUNDING:
reason = 'rounding errors are becoming damaging.'
elif info == NO_SPACE_BETWEEN_BOUNDS:
reason = 'there is no space between the lower and upper bounds of variable.'
elif info == ZERO_LINEAR_CONSTRAINT:
reason = 'one of the linear constraints has a zero gradient'
elif info == CALLBACK_TERMINATE:
reason = 'the callback function requested termination'
else:
reason = 'UNKNOWN EXIT FLAG'
ret_message = f'Return from {solver} because {reason.strip()}'
return ret_message
def retmsg(solver, info, iprint, nf, f, x, cstrv=None, constr=None):
'''
This function prints messages at return.
'''
# Local variables
valid_exit_codes = [FTARGET_ACHIEVED, MAXFUN_REACHED, MAXTR_REACHED,
SMALL_TR_RADIUS, TRSUBP_FAILED, NAN_INF_F, NAN_INF_X, NAN_INF_MODEL, DAMAGING_ROUNDING,
NO_SPACE_BETWEEN_BOUNDS, ZERO_LINEAR_CONSTRAINT, CALLBACK_TERMINATE]
# Preconditions
if DEBUGGING:
assert info in valid_exit_codes
#====================#
# Calculation starts #
#====================#
if abs(iprint) < 1: # No printing (iprint == 0)
return
elif iprint > 0: # Print the message to the standard out.
fname = ''
else: # Print the message to a file named FNAME.
fname = f'{solver}_output.txt'
# Decide whether the problem is truly constrained.
if present(constr):
is_constrained = (np.size(constr) > 0)
else:
is_constrained = present(cstrv)
# Decide the constraint violation.
if present(cstrv):
cstrv_loc = cstrv
elif present(constr):
cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0.
else:
cstrv_loc = 0
# Decide the return message.
ret_message = get_info_string(solver, info)
if np.size(x) <= 2:
x_message = f'\nThe corresponding X is: {x}' # Printed in one line
else:
x_message = f'\nThe corresponding X is:\n{x}'
if is_constrained:
nf_message = (f'\nNumber of function values = {nf}{spaces}'
f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}')
else:
nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}'
if is_constrained and present(constr):
if np.size(constr) <= 2:
constr_message = f'\nThe constraint value is: {constr}' # Printed in one line
else:
constr_message = f'\nThe constraint value is:\n{constr}'
else:
constr_message = ''
# Print the message.
if abs(iprint) >= 2:
message = f'\n{ret_message}{nf_message}{x_message}{constr_message}\n'
else:
message = f'{ret_message}{nf_message}{x_message}{constr_message}\n'
if len(fname) > 0:
with open(fname, 'a') as f: f.write(message)
else:
print(message)
def rhomsg(solver, iprint, nf, delta, f, rho, x, cstrv=None, constr=None, cpen=None):
'''
This function prints messages when RHO is updated.
'''
#====================#
# Calculation starts #
#====================#
if abs(iprint) < 2: # No printing
return
elif iprint > 0: # Print the message to the standard out.
fname = ''
else: # Print the message to a file named FNAME.
fname = f'{solver.strip()}_output.txt'
# Decide whether the problem is truly constrained.
if present(constr):
is_constrained = (np.size(constr) > 0)
else:
is_constrained = present(cstrv)
# Decide the constraint violation.
if present(cstrv):
cstrv_loc = cstrv
elif present(constr):
cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0.
else:
cstrv_loc = 0
if present(cpen):
rho_message = (f'\nNew RHO = {rho}{spaces}Delta = {delta}{spaces}'
f'CPEN = {cpen}')
else:
rho_message = f'\nNew RHO = {rho}{spaces}Delta = {delta}'
if np.size(x) <= 2:
x_message = f'\nThe corresponding X is: {x}' # Printed in one line
else:
x_message = f'\nThe corresponding X is:\n{x}'
if is_constrained:
nf_message = (f'\nNumber of function values = {nf}{spaces}'
f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}')
else:
nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}'
if is_constrained and present(constr):
if np.size(constr) <= 2:
constr_message = f'\nThe constraint value is: {constr}' # Printed in one line
else:
constr_message = f'\nThe constraint value is:\n{constr}'
else:
constr_message = ''
# Print the message.
if abs(iprint) >= 3:
message = f'\n{rho_message}{nf_message}{x_message}{constr_message}'
else:
message = f'{rho_message}{nf_message}{x_message}{constr_message}'
if len(fname) > 0:
with open(fname, 'a') as f: f.write(message)
else:
print(message)
#====================#
# Calculation ends #
#====================#
def cpenmsg(solver, iprint, cpen):
'''
This function prints a message when CPEN is updated.
'''
#====================#
# Calculation starts #
#====================#
if abs(iprint) < 2: # No printing
return
elif iprint > 0: # Print the message to the standard out.
fname = ''
else: # Print the message to a file named FNAME.
fname = f'{solver.strip()}_output.txt'
# Print the message.
if abs(iprint) >= 3:
message = f'\nSet CPEN to {cpen}'
else:
message = f'\n\nSet CPEN to {cpen}'
if len(fname) > 0:
with open(fname, 'a') as f: f.write(message)
else:
print(message)
#====================#
# Calculation ends #
#====================#
def fmsg(solver, state, iprint, nf, delta, f, x, cstrv=None, constr=None):
'''
This subroutine prints messages for each evaluation of the objective function.
'''
#====================#
# Calculation starts #
#====================#
if abs(iprint) < 2: # No printing
return
elif iprint > 0: # Print the message to the standard out.
fname = ''
else: # Print the message to a file named FNAME.
fname = f'{solver.strip()}_output.txt'
# Decide whether the problem is truly constrained.
if present(constr):
is_constrained = (np.size(constr) > 0)
else:
is_constrained = present(cstrv)
# Decide the constraint violation.
if present(cstrv):
cstrv_loc = cstrv
elif present(constr):
cstrv_loc = np.max(np.append(0, -constr)) # N.B.: We assume that the constraint is CONSTR >= 0.
else:
cstrv_loc = 0
delta_message = f'\n{state} step with radius = {delta}'
if is_constrained:
nf_message = (f'\nNumber of function values = {nf}{spaces}'
f'Least value of F = {f}{spaces}Constraint violation = {cstrv_loc}')
else:
nf_message = f'\nNumber of function values = {nf}{spaces}Least value of F = {f}'
if np.size(x) <= 2:
x_message = f'\nThe corresponding X is: {x}' # Printed in one line
else:
x_message = f'\nThe corresponding X is:\n{x}'
if is_constrained and present(constr):
if np.size(constr) <= 2:
constr_message = f'\nThe constraint value is: {constr}' # Printed in one line
else:
constr_message = f'\nThe constraint value is:\n{constr}'
else:
constr_message = ''
# Print the message.
message = f'{delta_message}{nf_message}{x_message}{constr_message}'
if len(fname) > 0:
with open(fname, 'a') as f: f.write(message)
else:
print(message)
#====================#
# Calculation ends #
#====================#

View File

@@ -0,0 +1,131 @@
'''
This module provides some Powell-style linear algebra procedures.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
import numpy as np
from .linalg import isminor, planerot, matprod, inprod, hypot
from .consts import DEBUGGING, EPS
def qradd_Rdiag(c, Q, Rdiag, n):
'''
This function updates the QR factorization of an MxN matrix A of full column rank, attempting to
add a new column C to this matrix as the LAST column while maintaining the full-rankness.
Case 1. If C is not in range(A) (theoretically, it implies N < M), then the new matrix is np.hstack([A, C])
Case 2. If C is in range(A), then the new matrix is np.hstack([A[:, :n-1], C])
N.B.:
0. Instead of R, this subroutine updates Rdiag, which is np.diag(R), with a size at most M and at
least min(m, n+1). The number is min(m, n+1) rather than min(m, n) as n may be augmented by 1 in
the function.
1. With the two cases specified as above, this function does not need A as an input.
2. The function changes only Q[:, nsave:m] (nsave is the original value of n) and
R[:, n-1] (n takes the updated value)
3. Indeed, when C is in range(A), Powell wrote in comments that "set iOUT to the index of the
constraint (here, column of A --- Zaikun) to be deleted, but branch if no suitable index can be
found". The idea is to replace a column of A by C so that the new matrix still has full rank
(such a column must exist unless C = 0). But his code essentially sets iout=n always. Maybe he
found this worked well enough in practice. Meanwhile, Powell's code includes a snippet that can
never be reached, which was probably intended to deal with the case that IOUT != n
'''
m = Q.shape[1]
nsave = n # Needed for debugging (only)
# As in Powell's COBYLA, CQ is set to 0 at the positions with CQ being negligible as per ISMINOR.
# This may not be the best choice if the subroutine is used in other contexts, e.g. LINCOA.
cq = matprod(c, Q)
cqa = matprod(abs(c), abs(Q))
# The line below basically makes an element of cq 0 if adding it to the corresponding element of
# cqa does not change the latter.
cq = np.array([0 if isminor(cqi, cqai) else cqi for cqi, cqai in zip(cq, cqa)])
# Update Q so that the columns of Q[:, n+1:m] are orthogonal to C. This is done by applying a 2D
# Givens rotation to Q[:, [k, k+1]] from the right to zero C' @ Q[:, k+1] out for K=n+1, ... m-1.
# Nothing will be done if n >= m-1
for k in range(m-2, n-1, -1):
if abs(cq[k+1]) > 0:
# Powell wrote cq[k+1] != 0 instead of abs. The two differ if cq[k+1] is NaN.
# If we apply the rotation below when cq[k+1] = 0, then cq[k] will get updated to |cq[k]|.
G = planerot(cq[k:k+2])
Q[:, [k, k+1]] = matprod(Q[:, [k, k+1]], G.T)
cq[k] = hypot(*cq[k:k+2])
# Augment n by 1 if C is not in range(A)
if n < m:
# Powell's condition for the following if: cq[n+1] != 0
if abs(cq[n]) > EPS**2 and not isminor(cq[n], cqa[n]):
n += 1
# Update Rdiag so that Rdiag[n] = cq[n] = np.dot(c, q[:, n]). Note that N may be been augmented.
if n - 1 >= 0 and n - 1 < m: # n >= m should not happen unless the input is wrong
Rdiag[n - 1] = cq[n - 1]
if DEBUGGING:
assert nsave <= n <= min(nsave + 1, m)
assert n <= len(Rdiag) <= m
assert Q.shape == (m, m)
return Q, Rdiag, n
def qrexc_Rdiag(A, Q, Rdiag, i): # Used in COBYLA
'''
This function updates the QR factorization for an MxN matrix A=Q@R so that the updated Q and
R form a QR factorization of [A_0, ..., A_{I-1}, A_{I+1}, ..., A_{N-1}, A_I] which is the matrix
obtained by rearranging columns [I, I+1, ... N-1] of A to [I+1, ..., N-1, I]. Here A is ASSUMED TO
BE OF FULL COLUMN RANK, Q is a matrix whose columns are orthogonal, and R, which is not present,
is an upper triangular matrix whose diagonal entries are nonzero. Q and R need not be square.
N.B.:
0. Instead of R, this function updates Rdiag, which is np.diag(R), the size being n.
1. With L = Q.shape[1] = R.shape[0], we have M >= L >= N. Most often L = M or N.
2. This function changes only Q[:, i:] and Rdiag[i:]
3. (NDB 20230919) In Python, i is either icon or nact - 2, whereas in FORTRAN it is either icon or nact - 1.
'''
# Sizes
m, n = A.shape
# Preconditions
assert n >= 1 and n <= m
assert i >= 0 and i < n
assert len(Rdiag) == n
assert Q.shape[0] == m and Q.shape[1] >= n and Q.shape[1] <= m
# tol = max(1.0E-8, min(1.0E-1, 1.0E8 * EPS * m + 1))
# assert isorth(Q, tol) # Costly!
if i < 0 or i >= n:
return Q, Rdiag
# Let R be the upper triangular matrix in the QR factorization, namely R = Q.T@A.
# For each k, find the Givens rotation G with G@(R[k:k+2, :]) = [hypt, 0], and update Q[:, k:k+2]
# to Q[:, k:k+2]@(G.T). Then R = Q.T@A is an upper triangular matrix as long as A[:, [k, k+1]] is
# updated to A[:, [k+1, k]]. Indeed, this new upper triangular matrix can be obtained by first
# updating R[[k, k+1], :] to G@(R[[k, k+1], :]) and then exchanging its columns K and K+1; at the same
# time, entries k and k+1 of R's diagonal Rdiag become [hypt, -(Rdiag[k+1]/hypt)*RDiag[k]].
# After this is done for each k = 0, ..., n-2, we obtain the QR factorization of the matrix that
# rearranges columns [i, i+1, ... n-1] of A as [i+1, ..., n-1, i].
# Powell's code, however, is slightly different: before everything, he first exchanged columns k and
# k+1 of Q (as well as rows k and k+1 of R). This makes sure that the entries of the update Rdiag
# are all positive if it is the case for the original Rdiag.
for k in range(i, n-1):
G = planerot([Rdiag[k+1], inprod(Q[:, k], A[:, k+1])])
Q[:, [k, k+1]] = matprod(Q[:, [k+1, k]], (G.T))
# Powell's code updates Rdiag in the following way:
# hypt = np.sqrt(Rdiag[k+1]**2 + np.dot(Q[:, k], A[:, k+1])**2)
# Rdiag[[k, k+1]] = [hypt, (Rdiag[k+1]/hypt)*Rdiag[k]]
# Note that Rdiag[n-1] inherits all rounding in Rdiag[i:n-1] and Q[:, i:n-1] and hence contains
# significant errors. Thus we may modify Powell's code to set only Rdiag[k] = hypt here and then
# calculate Rdiag[n] by an inner product after the loop. Nevertheless, we simple calculate RDiag
# from scratch below.
# Calculate Rdiag(i:n) from scratch
Rdiag[i:n-1] = [inprod(Q[:, k], A[:, k+1]) for k in range(i, n-1)]
Rdiag[n-1] = inprod(Q[:, n-1], A[:, i])
return Q, Rdiag

View File

@@ -0,0 +1,277 @@
'''
This is a module that preprocesses the inputs.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
from .consts import DEBUGGING, EPS, IPRINT_DEFAULT, FTARGET_DEFAULT, \
MIN_MAXFILT, MAXFILT_DEFAULT, MAXHISTMEM, ETA1_DEFAULT, ETA2_DEFAULT, \
GAMMA1_DEFAULT, GAMMA2_DEFAULT, RHOBEG_DEFAULT, RHOEND_DEFAULT, \
CTOL_DEFAULT, CWEIGHT_DEFAULT
from .present import present
from warnings import warn
import numpy as np
def preproc(solver, num_vars, iprint, maxfun, maxhist, ftarget, rhobeg, rhoend,
num_constraints=None, npt=None, maxfilt=None, ctol=None, cweight=None,
eta1=None, eta2=None, gamma1=None, gamma2=None, is_constrained=None, has_rhobeg=None,
honour_x0=None, xl=None, xu=None, x0=None):
'''
This subroutine preprocesses the inputs. It does nothing to the inputs that are valid.
'''
# Preconditions
if DEBUGGING:
assert num_vars >= 1
if present(num_constraints):
assert num_constraints >= 0
assert num_constraints == 0 or solver.lower() == 'cobyla'
if solver.lower() == 'cobyla' and present(num_constraints) and present(is_constrained):
assert num_constraints == 0 or is_constrained
if solver.lower() == 'bobyqa':
assert present(xl) and present(xu)
assert all(xu - xl >= 2 * EPS)
if present(honour_x0):
assert solver.lower() == 'bobyqa' and present(has_rhobeg) and present(xl) and present(xu) and present(x0)
#====================#
# Calculation starts #
#====================#
# Read num_constraints, if necessary
num_constraints = num_constraints if (present(num_constraints) and solver.lower() == 'cobyla') else 0
# Decide whether the problem is truly constrained
is_constrained = is_constrained if (present(is_constrained)) else num_constraints > 0
# Validate IPRINT
if np.abs(iprint) > 3:
iprint = IPRINT_DEFAULT
warn(f'{solver}: Invalid IPRINT; it should be 0, 1, -1, 2, -2, 3, or -3; it is set to {iprint}')
# Validate MAXFUN
if solver.lower() == 'uobyqa':
min_maxfun = (num_vars + 1) * (num_vars + 2) / 2 + 1
min_maxfun_str = '(N+1)(N+2)/2 + 1'
elif solver.lower() == 'cobyla':
min_maxfun = num_vars + 2
min_maxfun_str = 'num_vars + 2'
else: # CASE ('NEWUOA', 'BOBYQA', 'LINCOA')
min_maxfun = num_vars + 3
min_maxfun_str = 'num_vars + 3'
if maxfun < min_maxfun:
maxfun = min_maxfun
warn(f'{solver}: Invalid MAXFUN; it should be at least {min_maxfun_str}; it is set to {maxfun}')
# Validate MAXHIST
if maxhist < 0:
maxhist = maxfun
warn(f'{solver}: Invalid MAXHIST; it should be a nonnegative integer; it is set to {maxhist}')
maxhist = min(maxhist, maxfun) # MAXHIST > MAXFUN is never needed.
# Validate FTARGET
if np.isnan(ftarget):
ftarget = FTARGET_DEFAULT
warn(f'{solver}: Invalid FTARGET; it should be a real number; it is set to {ftarget}')
# Validate NPT
if (solver.lower() == 'newuoa' or solver.lower() == 'bobyqa' or solver.lower() == 'lincoa') and present(npt):
if (npt < num_vars + 2 or npt > min(maxfun - 1, ((num_vars + 2) * (num_vars + 1)) / 2)):
npt = int(min(maxfun - 1, 2 * num_vars + 1))
warn(f'{solver}: Invalid NPT; it should be an integer in the interval [N+2, (N+1)(N+2)/2] and less than MAXFUN; it is set to {npt}')
# Validate MAXFILT
if present(maxfilt) and (solver.lower() == 'lincoa' or solver.lower() == 'cobyla'):
maxfilt_in = maxfilt
if maxfilt < 1:
maxfilt = MAXFILT_DEFAULT # The inputted MAXFILT is obviously wrong.
else:
maxfilt = max(MIN_MAXFILT, maxfilt) # The inputted MAXFILT is too small.
# Further revise MAXFILT according to MAXHISTMEM.
if solver.lower() == 'lincoa':
unit_memo = (num_vars + 2) * np.dtype(float).itemsize
elif solver.lower() == 'cobyla':
unit_memo = (num_constraints + num_vars + 2) * np.dtype(float).itemsize
else:
unit_memo = 1
# We cannot simply set MAXFILT = MIN(MAXFILT, MAXHISTMEM/...), as they may not have
# the same kind, and compilers may complain. We may convert them, but overflow may occur.
if maxfilt > MAXHISTMEM / unit_memo:
maxfilt = int(MAXHISTMEM / unit_memo) # Integer division.
maxfilt = min(maxfun, max(MIN_MAXFILT, maxfilt))
if is_constrained:
if maxfilt_in < 1:
warn(f'{solver}: Invalid MAXFILT; it should be a positive integer; it is set to {maxfilt}')
elif maxfilt_in < min(maxfun, MIN_MAXFILT):
warn(f'{solver}: MAXFILT is too small; it is set to {maxfilt}')
elif maxfilt < min(maxfilt_in, maxfun):
warn(f'{solver}: MAXFILT is set to {maxfilt} due to memory limit')
# Validate ETA1 and ETA2
eta1_local = eta1 if present(eta1) else ETA1_DEFAULT
eta2_local = eta2 if present(eta2) else ETA2_DEFAULT
# When the difference between ETA1 and ETA2 is tiny, we force them to equal.
# See the explanation around RHOBEG and RHOEND for the reason.
if present(eta1) and present(eta2):
if np.abs(eta1 - eta2) < 1.0E2 * EPS * max(np.abs(eta1), 1):
eta2 = eta1
if present(eta1):
if np.isnan(eta1):
# In this case, we take the value hard coded in Powell's original code
# without any warning. It is useful when interfacing with MATLAB/Python.
eta1 = ETA1_DEFAULT
elif eta1 < 0 or eta1 >= 1:
# Take ETA2 into account if it has a valid value.
if present(eta2) and eta2_local > 0 and eta2_local <= 1:
eta1 = max(EPS, eta2 / 7.0)
else:
eta1 = ETA1_DEFAULT
warn(f'{solver}: Invalid ETA1; it should be in the interval [0, 1) and not more than ETA2; it is set to {eta1}')
if present(eta2):
if np.isnan(eta2):
# In this case, we take the value hard coded in Powell's original code
# without any warning. It is useful when interfacing with MATLAB/Python.
eta2 = ETA2_DEFAULT
elif present(eta1) and (eta2 < eta1_local or eta2 > 1):
eta2 = (eta1 + 2) / 3.0
warn(f'{solver}: Invalid ETA2; it should be in the interval [0, 1) and not less than ETA1; it is set to {eta2}')
# Validate GAMMA1 and GAMMA2
if present(gamma1):
if np.isnan(gamma1):
# In this case, we take the value hard coded in Powell's original code
# without any warning. It is useful when interfacing with MATLAB/Python.
gamma1 = GAMMA1_DEFAULT
elif gamma1 <= 0 or gamma1 >= 1:
gamma1 = GAMMA1_DEFAULT
warn(f'{solver}: Invalid GAMMA1; it should in the interval (0, 1); it is set to {gamma1}')
if present(gamma2):
if np.isnan(gamma2):
# In this case, we take the value hard coded in Powell's original code
# without any warning. It is useful when interfacing with MATLAB/Python.
gamma2 = GAMMA2_DEFAULT
elif gamma2 < 1 or np.isinf(gamma2):
gamma2 = GAMMA2_DEFAULT
warn(f'{solver}: Invalid GAMMA2; it should be a real number not less than 1; it is set to {gamma2}')
# Validate RHOBEG and RHOEND
if np.abs(rhobeg - rhoend) < 1.0e2 * EPS * np.maximum(np.abs(rhobeg), 1):
# When the data is passed from the interfaces (e.g., MEX) to the Fortran code, RHOBEG, and RHOEND
# may change a bit. It was observed in a MATLAB test that MEX passed 1 to Fortran as
# 0.99999999999999978. Therefore, if we set RHOEND = RHOBEG in the interfaces, then it may happen
# that RHOEND > RHOBEG, which is considered as an invalid input. To avoid this situation, we
# force RHOBEG and RHOEND to equal when the difference is tiny.
rhoend = rhobeg
# Revise the default values for RHOBEG/RHOEND according to the solver.
if solver.lower() == 'bobyqa':
rhobeg_default = np.maximum(EPS, np.min(RHOBEG_DEFAULT, np.min(xu - xl) / 4.0))
rhoend_default = np.maximum(EPS, np.min(0.1 * rhobeg_default, RHOEND_DEFAULT))
else:
rhobeg_default = RHOBEG_DEFAULT
rhoend_default = RHOEND_DEFAULT
if solver.lower() == 'bobyqa':
# Do NOT merge the IF below into the ELIF above! Otherwise, XU and XL may be accessed even if
# the solver is not BOBYQA, because the logical evaluation is not short-circuit.
if rhobeg > np.min(xu - xl) / 2:
# Do NOT make this revision if RHOBEG not positive or not finite, because otherwise RHOBEG
# will get a huge value when XU or XL contains huge values that indicate unbounded variables.
rhobeg = np.min(xu - xl) / 4.0 # Here, we do not take RHOBEG_DEFAULT.
warn(f'{solver}: Invalid RHOBEG; {solver} requires 0 < RHOBEG <= np.min(XU-XL)/2; it is set to np.min(XU-XL)/4')
if rhobeg <= 0 or np.isnan(rhobeg) or np.isinf(rhobeg):
# Take RHOEND into account if it has a valid value. We do not do this if the solver is BOBYQA,
# which requires that RHOBEG <= (XU-XL)/2.
if np.isfinite(rhoend) and rhoend > 0 and solver.lower() != 'bobyqa':
rhobeg = max(10 * rhoend, rhobeg_default)
else:
rhobeg = rhobeg_default
warn(f'{solver}: Invalid RHOBEG; it should be a positive number; it is set to {rhobeg}')
if rhoend <= 0 or rhobeg < rhoend or np.isnan(rhoend) or np.isinf(rhoend):
rhoend = max(EPS, min(0.1 * rhobeg, rhoend_default))
warn(f'{solver}: Invalid RHOEND; it should be a positive number and RHOEND <= RHOBEG; it is set to {rhoend}')
# For BOBYQA, revise X0 or RHOBEG so that the distance between X0 and the inactive bounds is at
# least RHOBEG. If HONOUR_X0 == TRUE, revise RHOBEG if needed; otherwise, revise HONOUR_X0 if needed.
if present(honour_x0):
if honour_x0:
rhobeg_old = rhobeg;
lbx = np.isfinite(xl) & (x0 - xl <= EPS * np.maximum(1, np.abs(xl))) # X0 essentially equals XL
ubx = np.isfinite(xu) & (x0 - xu >= -EPS * np.maximum(1, np.abs(xu))) # X0 essentially equals XU
x0[lbx] = xl[lbx]
x0[ubx] = xu[ubx]
rhobeg = max(EPS, np.min([rhobeg, x0[~lbx] - xl[~lbx], xu[~ubx] - x0[~ubx]]))
if rhobeg_old - rhobeg > EPS * max(1, rhobeg_old):
rhoend = max(EPS, min(0.1 * rhobeg, rhoend)) # We do not revise RHOEND unless RHOBEG is truly revised.
if has_rhobeg:
warn(f'{solver}: RHOBEG is revised to {rhobeg} and RHOEND to at most 0.1*RHOBEG so that the distance between X0 and the inactive bounds is at least RHOBEG')
else:
rhoend = np.minimum(rhoend, rhobeg) # This may update RHOEND slightly.
else:
x0_old = x0 # Recorded to see whether X0 is really revised.
# N.B.: The following revision is valid only if XL <= X0 <= XU and RHOBEG <= MINVAL(XU-XL)/2,
# which should hold at this point due to the revision of RHOBEG and moderation of X0.
# The cases below are mutually exclusive in precise arithmetic as MINVAL(XU-XL) >= 2*RHOBEG.
lbx = x0 <= xl + 0.5 * rhobeg
lbx_plus = (x0 > xl + 0.5 * rhobeg) & (x0 < xl + rhobeg)
ubx = x0 >= xu - 0.5 * rhobeg
ubx_minus = (x0 < xu - 0.5 * rhobeg) & (x0 > xu - rhobeg)
x0[lbx] = xl[lbx]
x0[lbx_plus] = xl[lbx_plus] + rhobeg
x0[ubx] = xu[ubx]
x0[ubx_minus] = xu[ubx_minus] - rhobeg
if (any(np.abs(x0_old - x0) > 0)):
warn(f'{solver}: X0 is revised so that the distance between X0 and the inactive bounds is at least RHOBEG set HONOUR_X0 to .TRUE. if you prefer to keep X0 unchanged')
# Validate CTOL (it can be 0)
if (present(ctol)):
if (np.isnan(ctol) or ctol < 0):
ctol = CTOL_DEFAULT
if (is_constrained):
warn(f'{solver}: Invalid CTOL; it should be a nonnegative number; it is set to {ctol}')
# Validate CWEIGHT (it can be +Inf)
if (present(cweight)):
if (np.isnan(cweight) or cweight < 0):
cweight = CWEIGHT_DEFAULT
if (is_constrained):
warn(f'{solver}: Invalid CWEIGHT; it should be a nonnegative number; it is set to {cweight}')
#====================#
# Calculation ends #
#====================#
# Postconditions
if DEBUGGING:
assert abs(iprint) <= 3
assert maxhist >= 0 and maxhist <= maxfun
if present(npt):
assert maxfun >= npt + 1
assert npt >= 3
if present(maxfilt):
assert maxfilt >= np.minimum(MIN_MAXFILT, maxfun) and maxfilt <= maxfun
if present(eta1) and present(eta2):
assert eta1 >= 0 and eta1 <= eta2 and eta2 < 1
if present(gamma1) and present(gamma2):
assert gamma1 > 0 and gamma1 < 1 and gamma2 > 1
assert rhobeg >= rhoend and rhoend > 0
if solver.lower() == 'bobyqa':
assert all(rhobeg <= (xu - xl) / 2)
assert all(np.isfinite(x0))
assert all(x0 >= xl and (x0 <= xl or x0 >= xl + rhobeg))
assert all(x0 <= xu and (x0 >= xu or x0 <= xu - rhobeg))
if present(ctol):
assert ctol >= 0
return iprint, maxfun, maxhist, ftarget, rhobeg, rhoend, npt, maxfilt, ctol, cweight, eta1, eta2, gamma1, gamma2, x0

View File

@@ -0,0 +1,5 @@
def present(x):
'''
This is a Python equivalent of the Fortran 'present' function for optional arguments.
'''
return x is not None

View File

@@ -0,0 +1,54 @@
'''
This module calculates the reduction ratio for trust-region methods.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
from .consts import DEBUGGING, REALMAX
import numpy as np
def redrat(ared, pred, rshrink):
'''
This function evaluates the reduction ratio of a trust-region step, handling inf/nan properly.
'''
# Preconditions
if DEBUGGING:
assert rshrink >= 0
#====================#
# Calculation starts #
#====================#
if np.isnan(ared):
# This should not happen in unconstrained problems due to the moderated extreme barrier.
ratio = -REALMAX
elif np.isnan(pred) or pred <= 0:
# The trust-region subproblem solver fails in this rare case. Instead of terminating as Powell's
# original code does, we set ratio as follows so that the solver may continue to progress.
if ared > 0:
# The trial point will be accepted, but the trust-region radius will be shrunk if rshrink>0
ratio = rshrink/2
else:
# Set the ration to a large negative number to signify a bad trust-region step, so that the
# solver will check whether to take a geometry step or reduce rho.
ratio = -REALMAX
elif np.isposinf(pred) and np.isposinf(ared):
ratio = 1 # ared/pred = NaN if calculated directly
elif np.isposinf(pred) and np.isneginf(ared):
ratio = -REALMAX # ared/pred = NaN if calculated directly
else:
ratio = ared/pred
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert not np.isnan(ratio)
return ratio

View File

@@ -0,0 +1,47 @@
'''
This module provides a function that calculates RHO when it needs to be reduced.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
from .consts import DEBUGGING
import numpy as np
def redrho(rho_in, rhoend):
'''
This function calculates RHO when it needs to be reduced.
The scheme is shared by UOBYQA, NEWUOA, BOBYQA, LINCOA. For COBYLA, Powell's code reduces RHO by
'RHO *= 0.5; if RHO <= 1.5 * RHOEND: RHO = RHOEND' as specified in (11) of the COBYLA
paper. However, this scheme seems to work better, especially after we introduce DELTA.
'''
# Preconditions
if DEBUGGING:
assert rho_in > rhoend > 0
#====================#
# Calculation starts #
#====================#
rho_ratio = rho_in / rhoend
if rho_ratio > 250:
rho = 0.1 * rho_in
elif rho_ratio <= 16:
rho = rhoend
else:
rho = np.sqrt(rho_ratio) * rhoend # rho = np.sqrt(rho * rhoend)
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert rho_in > rho >= rhoend
return rho

View File

@@ -0,0 +1,296 @@
'''
This module provides subroutines that ensure the returned X is optimal among all the calculated
points in the sense that no other point achieves both lower function value and lower constraint
violation at the same time. This module is needed only in the constrained case.
Translated from Zaikun Zhang's modern-Fortran reference implementation in PRIMA.
Dedicated to late Professor M. J. D. Powell FRS (1936--2015).
Python translation by Nickolai Belakovski.
'''
import numpy as np
import numpy.typing as npt
from .consts import DEBUGGING, EPS, CONSTRMAX, REALMAX, FUNCMAX
from .present import present
def isbetter(f1: float, c1: float, f2: float, c2: float, ctol: float) -> bool:
'''
This function compares whether FC1 = (F1, C1) is (strictly) better than FC2 = (F2, C2), which
basically means that (F1 < F2 and C1 <= C2) or (F1 <= F2 and C1 < C2).
It takes care of the cases where some of these values are NaN or Inf, even though some cases
should never happen due to the moderated extreme barrier.
At return, BETTER = TRUE if and only if (F1, C1) is better than (F2, C2).
Here, C means constraint violation, which is a nonnegative number.
'''
# Preconditions
if DEBUGGING:
assert not any(np.isnan([f1, c1]) | np.isposinf([f2, c2]))
assert not any(np.isnan([f2, c2]) | np.isposinf([f2, c2]))
assert c1 >= 0 and c2 >= 0
assert ctol >= 0
#====================#
# Calculation starts #
#====================#
is_better = False
# Even though NaN/+Inf should not occur in FC1 or FC2 due to the moderated extreme barrier, for
# security and robustness, the code below does not make this assumption.
is_better = is_better or (any(np.isnan([f1, c1]) | np.isposinf([f1, c1])) and not any(np.isnan([f2, c2]) | np.isposinf([f2, c2])))
is_better = is_better or (f1 < f2 and c1 <= c2)
is_better = is_better or (f1 <= f2 and c1 < c2)
# If C1 <= CTOL and C2 is significantly larger/worse than CTOL, i.e., C2 > MAX(CTOL,CREF),
# then FC1 is better than FC2 as long as F1 < REALMAX. Normally CREF >= CTOL so MAX(CTOL, CREF)
# is indeed CREF. However, this may not be true if CTOL > 1E-1*CONSTRMAX.
cref = 10 * max(EPS, min(ctol, 1.0E-2 * CONSTRMAX)) # The MIN avoids overflow.
is_better = is_better or (f1 < REALMAX and c1 <= ctol and (c2 > max(ctol, cref) or np.isnan(c2)))
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert not (is_better and f1 >= f2 and c1 >= c2)
assert is_better or not (f1 <= f2 and c1 < c2)
assert is_better or not (f1 < f2 and c1 <= c2)
return is_better
def savefilt(cstrv, ctol, cweight, f, x, nfilt, cfilt, ffilt, xfilt, constr=None, confilt=None):
'''
This subroutine saves X, F, and CSTRV in XFILT, FFILT, and CFILT (and CONSTR in CONFILT
if they are present), unless a vector in XFILT[:, :NFILT] is better than X.
If X is better than some vectors in XFILT[:, :NFILT] then these vectors will be
removed. If X is not better than any of XFILT[:, :NFILT], but NFILT == MAXFILT,
then we remove a column from XFILT according to the merit function
PHI = FFILT + CWEIGHT * max(CFILT - CTOL, 0)
N.B.:
1. Only XFILT[:, :NFILT] and FFILT[:, :NFILT] etc contains valid information,
while XFILT[:, NFILT+1:MAXFILT] and FFILT[:, NFILT+1:MAXFILT] etc are not
initialized yet.
2. We decide whether and X is better than another by the ISBETTER function
'''
# Sizes
if present(constr):
num_constraints = len(constr)
else:
num_constraints = 0
num_vars = len(x)
maxfilt = len(ffilt)
# Preconditions
if DEBUGGING:
# Check the size of X.
assert num_vars >= 1
# Check CWEIGHT and CTOL
assert cweight >= 0
assert ctol >= 0
# Check NFILT
assert nfilt >= 0 and nfilt <= maxfilt
# Check the sizes of XFILT, FFILT, CFILT.
assert maxfilt >= 1
assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt
assert np.size(cfilt) == maxfilt
# Check the values of XFILT, FFILT, CFILT.
assert not (np.isnan(xfilt[:, :nfilt])).any()
assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt]))
assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt]))
# Check the values of X, F, CSTRV.
# X does not contain NaN if X0 does not and the trust-region/geometry steps are proper.
assert not any(np.isnan(x))
# F cannot be NaN/+Inf due to the moderated extreme barrier.
assert not (np.isnan(f) | np.isposinf(f))
# CSTRV cannot be NaN/+Inf due to the moderated extreme barrier.
assert not (cstrv < 0 | np.isnan(cstrv) | np.isposinf(cstrv))
# Check CONSTR and CONFILT.
assert present(constr) == present(confilt)
if present(constr):
# CONSTR cannot contain NaN/-Inf due to the moderated extreme barrier.
assert not any(np.isnan(constr) | np.isneginf(constr))
assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt
assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any()
#====================#
# Calculation starts #
#====================#
# Return immediately if any column of XFILT is better than X.
if any((isbetter(ffilt_i, cfilt_i, f, cstrv, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt]))) or \
any(np.logical_and(ffilt[:nfilt] <= f, cfilt[:nfilt] <= cstrv)):
return nfilt, cfilt, ffilt, xfilt, confilt
# Decide which columns of XFILT to keep.
keep = np.logical_not([isbetter(f, cstrv, ffilt_i, cfilt_i, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])])
# If NFILT == MAXFILT and X is not better than any column of XFILT, then we remove the worst column
# of XFILT according to the merit function PHI = FFILT + CWEIGHT * MAX(CFILT - CTOL, ZERO).
if sum(keep) == maxfilt: # In this case, NFILT = SIZE(KEEP) = COUNT(KEEP) = MAXFILT > 0.
cfilt_shifted = np.maximum(cfilt - ctol, 0)
if cweight <= 0:
phi = ffilt
elif np.isposinf(cweight):
phi = cfilt_shifted
# We should not use CFILT here; if MAX(CFILT_SHIFTED) is attained at multiple indices, then
# we will check FFILT to exhaust the remaining degree of freedom.
else:
phi = np.maximum(ffilt, -REALMAX)
phi = np.nan_to_num(phi, nan=-REALMAX) # Replace NaN with -REALMAX and +/- inf with large numbers
phi += cweight * cfilt_shifted
# We select X to maximize PHI. In case there are multiple maximizers, we take the one with the
# largest CSTRV_SHIFTED; if there are more than one choices, we take the one with the largest F;
# if there are several candidates, we take the one with the largest CSTRV; if the last comparison
# still leads to more than one possibilities, then they are equally bad and we choose the first.
# N.B.:
# 1. This process is the opposite of selecting KOPT in SELECTX.
# 2. In finite-precision arithmetic, PHI_1 == PHI_2 and CSTRV_SHIFTED_1 == CSTRV_SHIFTED_2 do
# not ensure that F_1 == F_2!
phimax = max(phi)
cref = max(cfilt_shifted[phi >= phimax])
fref = max(ffilt[cfilt_shifted >= cref])
kworst = np.ma.array(cfilt, mask=(ffilt > fref)).argmax()
if kworst < 0 or kworst >= len(keep): # For security. Should not happen.
kworst = 0
keep[kworst] = False
# Keep the good xfilt values and remove all the ones that are strictly worse than the new x.
nfilt = sum(keep)
index_to_keep = np.where(keep)[0]
xfilt[:, :nfilt] = xfilt[:, index_to_keep]
ffilt[:nfilt] = ffilt[index_to_keep]
cfilt[:nfilt] = cfilt[index_to_keep]
if confilt is not None and constr is not None:
confilt[:, :nfilt] = confilt[:, index_to_keep]
# Once we have removed all the vectors that are strictly worse than x,
# we add x to the filter.
xfilt[:, nfilt] = x
ffilt[nfilt] = f
cfilt[nfilt] = cstrv
if confilt is not None and constr is not None:
confilt[:, nfilt] = constr
nfilt += 1 # In Python we need to increment the index afterwards
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
# Check NFILT and the sizes of XFILT, FFILT, CFILT.
assert nfilt >= 1 and nfilt <= maxfilt
assert np.size(xfilt, 0) == num_vars and np.size(xfilt, 1) == maxfilt
assert np.size(ffilt) == maxfilt
assert np.size(cfilt) == maxfilt
# Check the values of XFILT, FFILT, CFILT.
assert not (np.isnan(xfilt[:, :nfilt])).any()
assert not any(np.isnan(ffilt[:nfilt]) | np.isposinf(ffilt[:nfilt]))
assert not any(cfilt[:nfilt] < 0 | np.isnan(cfilt[:nfilt]) | np.isposinf(cfilt[:nfilt]))
# Check that no point in the filter is better than X, and X is better than no point.
assert not any([isbetter(ffilt_i, cfilt_i, f, cstrv, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])])
assert not any([isbetter(f, cstrv, ffilt_i, cfilt_i, ctol) for ffilt_i, cfilt_i in zip(ffilt[:nfilt], cfilt[:nfilt])])
# Check CONFILT.
if present(confilt):
assert np.size(confilt, 0) == num_constraints and np.size(confilt, 1) == maxfilt
assert not (np.isnan(confilt[:, :nfilt]) | np.isneginf(confilt[:, :nfilt])).any()
return nfilt, cfilt, ffilt, xfilt, confilt
def selectx(fhist: npt.NDArray, chist: npt.NDArray, cweight: float, ctol: float):
'''
This subroutine selects X according to FHIST and CHIST, which represents (a part of) history
of F and CSTRV. Normally, FHIST and CHIST are not the full history but only a filter, e.g. ffilt
and CFILT generated by SAVEFILT. However, we name them as FHIST and CHIST because the [F, CSTRV]
in a filter should not dominate each other, but this subroutine does NOT assume such a property.
N.B.: CTOL is the tolerance of the constraint violation (CSTRV). A point is considered feasible if
its constraint violation is at most CTOL. Not that CTOL is absolute, not relative.
'''
# Sizes
nhist = len(fhist)
# Preconditions
if DEBUGGING:
assert nhist >= 1
assert np.size(chist) == nhist
assert not any(np.isnan(fhist) | np.isposinf(fhist))
assert not any(chist < 0 | np.isnan(chist) | np.isposinf(chist))
assert cweight >= 0
assert ctol >= 0
#====================#
# Calculation starts #
#====================#
# We select X among the points with F < FREF and CSTRV < CREF.
# Do NOT use F <= FREF, because F == FREF (FUNCMAX or REALMAX) may mean F == INF in practice!
if any(np.logical_and(fhist < FUNCMAX, chist < CONSTRMAX)):
fref = FUNCMAX
cref = CONSTRMAX
elif any(np.logical_and(fhist < REALMAX, chist < CONSTRMAX)):
fref = REALMAX
cref = CONSTRMAX
elif any(np.logical_and(fhist < FUNCMAX, chist < REALMAX)):
fref = FUNCMAX
cref = REALMAX
else:
fref = REALMAX
cref = REALMAX
if not any(np.logical_and(fhist < fref, chist < cref)):
kopt = nhist - 1
else:
# Shift the constraint violations by ctol, so that cstrv <= ctol is regarded as no violation.
chist_shifted = np.maximum(chist - ctol, 0)
# cmin is the minimal shift constraint violation attained in the history.
cmin = np.min(chist_shifted[fhist < fref])
# We consider only the points whose shifted constraint violations are at most the cref below.
# N.B.: Without taking np.maximum(EPS, .), cref would be 0 if cmin = 0. In that case, asking for
# cstrv_shift < cref would be WRONG!
cref = np.maximum(EPS, 2*cmin)
# We use the following phi as our merit function to select X.
if cweight <= 0:
phi = fhist
elif np.isposinf(cweight):
phi = chist_shifted
# We should not use chist here; if np.minimum(chist_shifted) is attained at multiple indices, then
# we will check fhist to exhaust the remaining degree of freedom.
else:
phi = np.maximum(fhist, -REALMAX) + cweight * chist_shifted
# np.maximum(fhist, -REALMAX) makes sure that phi will not contain NaN (unless there is a bug).
# We select X to minimize phi subject to f < fref and cstrv_shift <= cref (see the comments
# above for the reason of taking "<" and "<=" in these two constraints). In case there are
# multiple minimizers, we take the one with the least cstrv_shift; if there is more than one
# choice, we take the one with the least f; if there are several candidates, we take the one
# with the least cstrv; if the last comparison still leads to more than one possibility, then
# they are equally good and we choose the first.
# N.B.:
# 1. This process is the opposite of selecting kworst in savefilt
# 2. In finite-precision arithmetic, phi_2 == phi_2 and cstrv_shift_1 == cstrv_shifted_2 do
# not ensure thatn f_1 == f_2!
phimin = np.min(phi[np.logical_and(fhist < fref, chist_shifted <= cref)])
cref = np.min(chist_shifted[np.logical_and(fhist < fref, phi <= phimin)])
fref = np.min(fhist[chist_shifted <= cref])
# Can't use argmin here because using it with a mask throws off the index
kopt = np.ma.array(chist, mask=(fhist > fref)).argmin()
#==================#
# Calculation ends #
#==================#
# Postconditions
if DEBUGGING:
assert kopt >= 0 and kopt < nhist
assert not any([isbetter(fhisti, chisti, fhist[kopt], chist[kopt], ctol) for fhisti, chisti in zip(fhist[:nhist], chist[:nhist])])
return kopt