Source code for torchmin.lstsq.least_squares

"""
Generic interface for nonlinear least-squares minimization.
"""
from warnings import warn
import numbers
import torch

from .trf import trf
from .common import EPS, in_bounds, make_strictly_feasible

__all__ = ['least_squares']


TERMINATION_MESSAGES = {
    -1: "Improper input parameters status returned from `leastsq`",
    0: "The maximum number of function evaluations is exceeded.",
    1: "`gtol` termination condition is satisfied.",
    2: "`ftol` termination condition is satisfied.",
    3: "`xtol` termination condition is satisfied.",
    4: "Both `ftol` and `xtol` termination conditions are satisfied."
}


def prepare_bounds(bounds, x0):
    n = x0.shape[0]
    def process(b):
        if isinstance(b, numbers.Number):
            return x0.new_full((n,), b)
        elif isinstance(b, torch.Tensor):
            if b.dim() == 0:
                return x0.new_full((n,), b)
            return b
        else:
            raise ValueError

    lb, ub = [process(b) for b in bounds]

    return lb, ub


def check_tolerance(ftol, xtol, gtol, method):
    def check(tol, name):
        if tol is None:
            tol = 0
        elif tol < EPS:
            warn("Setting `{}` below the machine epsilon ({:.2e}) effectively "
                 "disables the corresponding termination condition."
                 .format(name, EPS))
        return tol

    ftol = check(ftol, "ftol")
    xtol = check(xtol, "xtol")
    gtol = check(gtol, "gtol")

    if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):
        raise ValueError("All tolerances must be higher than machine epsilon "
                         "({:.2e}) for method 'lm'.".format(EPS))
    elif ftol < EPS and xtol < EPS and gtol < EPS:
        raise ValueError("At least one of the tolerances must be higher than "
                         "machine epsilon ({:.2e}).".format(EPS))

    return ftol, xtol, gtol


def check_x_scale(x_scale, x0):
    if isinstance(x_scale, str) and x_scale == 'jac':
        return x_scale
    try:
        x_scale = torch.as_tensor(x_scale)
        valid = x_scale.isfinite().all() and x_scale.gt(0).all()
    except (ValueError, TypeError):
        valid = False

    if not valid:
        raise ValueError("`x_scale` must be 'jac' or array_like with "
                         "positive numbers.")

    if x_scale.dim() == 0:
        x_scale = x0.new_full(x0.shape, x_scale)

    if x_scale.shape != x0.shape:
        raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")

    return x_scale


[docs]def least_squares( fun, x0, bounds=None, method='trf', ftol=1e-8, xtol=1e-8, gtol=1e-8, x_scale=1.0, tr_solver='lsmr', tr_options=None, max_nfev=None, verbose=0): r"""Solve a nonlinear least-squares problem with bounds on the variables. Given the residual function :math:`f: \mathcal{R}^n \rightarrow \mathcal{R}^m`, `least_squares` finds a local minimum of the residual sum-of-squares (RSS) objective: .. math:: x^* = \underset{x}{\operatorname{arg\,min\,}} \frac{1}{2} ||f(x)||_2^2 \quad \text{subject to} \quad lb \leq x \leq ub The solution is found using variants of the Gauss-Newton method, a modification of Newton's method tailored to RSS problems. Parameters ---------- fun : callable Function which computes the vector of residuals, with the signature ``fun(x)``. The argument ``x`` passed to this function is a Tensor of shape (n,) (never a scalar, even for n=1). It must allocate and return a 1-D Tensor of shape (m,) or a scalar. x0 : Tensor or float Initial guess on independent variables, with shape (n,). If float, it will be treated as a 1-D Tensor with one element. bounds : 2-tuple of Tensor, optional Lower and upper bounds on independent variables. Defaults to no bounds. Each Tensor must match the size of `x0` or be a scalar, in the latter case a bound will be the same for all variables. Use ``inf`` with an appropriate sign to disable bounds on all or some variables. method : str, optional Algorithm to perform minimization. Default is 'trf'. * 'trf' : Trust Region Reflective algorithm, particularly suitable for large sparse problems with bounds. Generally robust method. * 'dogbox' : COMING SOON. dogleg algorithm with rectangular trust regions, typical use case is small problems with bounds. Not recommended for problems with rank-deficient Jacobian. ftol : float or None, optional Tolerance for termination by the change of the cost function. The optimization process is stopped when ``dF < ftol * F``, and there was an adequate agreement between a local quadratic model and the true model in the last step. If None, the termination by this condition is disabled. Default is 1e-8. xtol : float or None, optional Tolerance for termination by the change of the independent variables. Termination occurs when ``norm(dx) < xtol * (xtol + norm(x))``. If None, the termination by this condition is disabled. Default is 1e-8. gtol : float or None, optional Tolerance for termination by the norm of the gradient. Default is 1e-8. The exact condition depends on `method` used: * For 'trf' : ``norm(g_scaled, ord=inf) < gtol``, where ``g_scaled`` is the value of the gradient scaled to account for the presence of the bounds [STIR]_. * For 'dogbox' : ``norm(g_free, ord=inf) < gtol``, where ``g_free`` is the gradient with respect to the variables which are not in the optimal state on the boundary. x_scale : Tensor or 'jac', optional Characteristic scale of each variable. Setting `x_scale` is equivalent to reformulating the problem in scaled variables ``xs = x / x_scale``. An alternative view is that the size of a trust region along jth dimension is proportional to ``x_scale[j]``. Improved convergence may be achieved by setting `x_scale` such that a step of a given size along any of the scaled variables has a similar effect on the cost function. If set to 'jac', the scale is iteratively updated using the inverse norms of the columns of the Jacobian matrix (as described in [JJMore]_). max_nfev : None or int, optional Maximum number of function evaluations before the termination. Defaults to 100 * n. tr_solver : str, optional Method for solving trust-region subproblems. * 'exact' is suitable for not very large problems with dense Jacobian matrices. The computational complexity per iteration is comparable to a singular value decomposition of the Jacobian matrix. * 'lsmr' is suitable for problems with sparse and large Jacobian matrices. It uses an iterative procedure for finding a solution of a linear least-squares problem and only requires matrix-vector product evaluations. tr_options : dict, optional Keyword options passed to trust-region solver. * ``tr_solver='exact'``: `tr_options` are ignored. * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`. Additionally, ``method='trf'`` supports 'regularize' option (bool, default is True), which adds a regularization term to the normal equation, which improves convergence if the Jacobian is rank-deficient [Byrd]_ (eq. 3.4). verbose : int, optional Level of algorithm's verbosity. * 0 : work silently (default). * 1 : display a termination report. * 2 : display progress during iterations. Returns ------- result : OptimizeResult Result of the optimization routine. References ---------- .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior, and Conjugate Gradient Method for Large-Scale Bound-Constrained Minimization Problems," SIAM Journal on Scientific Computing, Vol. 21, Number 1, pp 1-23, 1999. .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate solution of the trust region problem by minimization over two-dimensional subspaces", Math. Programming, 40, pp. 247-263, 1988. .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation and Theory," Numerical Analysis, ed. G. A. Watson, Lecture Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977. """ if tr_options is None: tr_options = {} if method not in ['trf', 'dogbox']: raise ValueError("`method` must be 'trf' or 'dogbox'.") if tr_solver not in ['exact', 'lsmr', 'cgls']: raise ValueError("`tr_solver` must be one of {'exact', 'lsmr', 'cgls'}.") if verbose not in [0, 1, 2]: raise ValueError("`verbose` must be in [0, 1, 2].") if bounds is None: bounds = (-float('inf'), float('inf')) elif not (isinstance(bounds, (tuple, list)) and len(bounds) == 2): raise ValueError("`bounds` must be a tuple/list with 2 elements.") if max_nfev is not None and max_nfev <= 0: raise ValueError("`max_nfev` must be None or positive integer.") # initial point x0 = torch.atleast_1d(x0) if torch.is_complex(x0): raise ValueError("`x0` must be real.") elif x0.dim() > 1: raise ValueError("`x0` must have at most 1 dimension.") # bounds lb, ub = prepare_bounds(bounds, x0) if lb.shape != x0.shape or ub.shape != x0.shape: raise ValueError("Inconsistent shapes between bounds and `x0`.") elif torch.any(lb >= ub): raise ValueError("Each lower bound must be strictly less than each " "upper bound.") elif not in_bounds(x0, lb, ub): raise ValueError("`x0` is infeasible.") # x_scale x_scale = check_x_scale(x_scale, x0) # tolerance ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method) if method == 'trf': x0 = make_strictly_feasible(x0, lb, ub) def fun_wrapped(x): return torch.atleast_1d(fun(x)) # check function f0 = fun_wrapped(x0) if f0.dim() != 1: raise ValueError("`fun` must return at most 1-d array_like. " "f0.shape: {0}".format(f0.shape)) elif not f0.isfinite().all(): raise ValueError("Residuals are not finite in the initial point.") initial_cost = 0.5 * f0.dot(f0) if isinstance(x_scale, str) and x_scale == 'jac': raise ValueError("x_scale='jac' can't be used when `jac` " "returns LinearOperator.") if method == 'trf': result = trf(fun_wrapped, x0, f0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, tr_solver, tr_options.copy(), verbose) elif method == 'dogbox': raise NotImplementedError("'dogbox' method not yet implemented") # if tr_solver == 'lsmr' and 'regularize' in tr_options: # warn("The keyword 'regularize' in `tr_options` is not relevant " # "for 'dogbox' method.") # tr_options = tr_options.copy() # del tr_options['regularize'] # result = dogbox(fun_wrapped, x0, f0, lb, ub, ftol, xtol, gtol, # max_nfev, x_scale, tr_solver, tr_options, verbose) else: raise ValueError("`method` must be 'trf' or 'dogbox'.") result.message = TERMINATION_MESSAGES[result.status] result.success = result.status > 0 if verbose >= 1: print(result.message) print("Function evaluations {0}, initial cost {1:.4e}, final cost " "{2:.4e}, first-order optimality {3:.2e}." .format(result.nfev, initial_cost, result.cost, result.optimality)) return result