Source code for torchmin.lstsq.least_squares

"""
Generic interface for nonlinear least-squares minimization.
"""
from warnings import warn
import numbers
import torch

from .trf import trf
from .common import EPS, in_bounds, make_strictly_feasible

__all__ = ['least_squares']


TERMINATION_MESSAGES = {
    -1: "Improper input parameters status returned from `leastsq`",
    0: "The maximum number of function evaluations is exceeded.",
    1: "`gtol` termination condition is satisfied.",
    2: "`ftol` termination condition is satisfied.",
    3: "`xtol` termination condition is satisfied.",
    4: "Both `ftol` and `xtol` termination conditions are satisfied."
}


def prepare_bounds(bounds, x0):
    n = x0.shape[0]
    def process(b):
        if isinstance(b, numbers.Number):
            return x0.new_full((n,), b)
        elif isinstance(b, torch.Tensor):
            if b.dim() == 0:
                return x0.new_full((n,), b)
            return b
        else:
            raise ValueError

    lb, ub = [process(b) for b in bounds]

    return lb, ub


def check_tolerance(ftol, xtol, gtol, method):
    def check(tol, name):
        if tol is None:
            tol = 0
        elif tol < EPS:
            warn("Setting `{}` below the machine epsilon ({:.2e}) effectively "
                 "disables the corresponding termination condition."
                 .format(name, EPS))
        return tol

    ftol = check(ftol, "ftol")
    xtol = check(xtol, "xtol")
    gtol = check(gtol, "gtol")

    if method == "lm" and (ftol < EPS or xtol < EPS or gtol < EPS):
        raise ValueError("All tolerances must be higher than machine epsilon "
                         "({:.2e}) for method 'lm'.".format(EPS))
    elif ftol < EPS and xtol < EPS and gtol < EPS:
        raise ValueError("At least one of the tolerances must be higher than "
                         "machine epsilon ({:.2e}).".format(EPS))

    return ftol, xtol, gtol


def check_x_scale(x_scale, x0):
    if isinstance(x_scale, str) and x_scale == 'jac':
        return x_scale
    try:
        x_scale = torch.as_tensor(x_scale)
        valid = x_scale.isfinite().all() and x_scale.gt(0).all()
    except (ValueError, TypeError):
        valid = False

    if not valid:
        raise ValueError("`x_scale` must be 'jac' or array_like with "
                         "positive numbers.")

    if x_scale.dim() == 0:
        x_scale = x0.new_full(x0.shape, x_scale)

    if x_scale.shape != x0.shape:
        raise ValueError("Inconsistent shapes between `x_scale` and `x0`.")

    return x_scale


[docs]def least_squares(
        fun, x0, bounds=None, method='trf', ftol=1e-8, xtol=1e-8,
        gtol=1e-8, x_scale=1.0, tr_solver='lsmr', tr_options=None,
        max_nfev=None, verbose=0):
    r"""Solve a nonlinear least-squares problem with bounds on the variables.

    Given the residual function
    :math:`f: \mathcal{R}^n \rightarrow \mathcal{R}^m`, `least_squares`
    finds a local minimum of the residual sum-of-squares (RSS) objective:

    .. math::
        x^* = \underset{x}{\operatorname{arg\,min\,}}
        \frac{1}{2} ||f(x)||_2^2 \quad \text{subject to} \quad lb \leq x \leq ub

    The solution is found using variants of the Gauss-Newton method, a
    modification of Newton's method tailored to RSS problems.

    Parameters
    ----------
    fun : callable
        Function which computes the vector of residuals, with the signature
        ``fun(x)``. The argument ``x`` passed to this
        function is a Tensor of shape (n,) (never a scalar, even for n=1).
        It must allocate and return a 1-D Tensor of shape (m,) or a scalar.
    x0 : Tensor or float
        Initial guess on independent variables, with shape (n,). If
        float, it will be treated as a 1-D Tensor with one element.
    bounds : 2-tuple of Tensor, optional
        Lower and upper bounds on independent variables. Defaults to no bounds.
        Each Tensor must match the size of `x0` or be a scalar, in the latter
        case a bound will be the same for all variables. Use ``inf`` with
        an appropriate sign to disable bounds on all or some variables.
    method : str, optional
        Algorithm to perform minimization. Default is 'trf'.

            * 'trf' : Trust Region Reflective algorithm, particularly suitable
              for large sparse problems with bounds. Generally robust method.
            * 'dogbox' : COMING SOON. dogleg algorithm with rectangular trust regions,
              typical use case is small problems with bounds. Not recommended
              for problems with rank-deficient Jacobian.
    ftol : float or None, optional
        Tolerance for termination by the change of the cost function. The
        optimization process is stopped when ``dF < ftol * F``,
        and there was an adequate agreement between a local quadratic model and
        the true model in the last step. If None, the termination by this
        condition is disabled. Default is 1e-8.
    xtol : float or None, optional
        Tolerance for termination by the change of the independent variables.
        Termination occurs when ``norm(dx) < xtol * (xtol + norm(x))``.
        If None, the termination by this condition is disabled. Default is 1e-8.
    gtol : float or None, optional
        Tolerance for termination by the norm of the gradient. Default is 1e-8.
        The exact condition depends on `method` used:

            * For 'trf' : ``norm(g_scaled, ord=inf) < gtol``, where
              ``g_scaled`` is the value of the gradient scaled to account for
              the presence of the bounds [STIR]_.
            * For 'dogbox' : ``norm(g_free, ord=inf) < gtol``, where
              ``g_free`` is the gradient with respect to the variables which
              are not in the optimal state on the boundary.
    x_scale : Tensor or 'jac', optional
        Characteristic scale of each variable. Setting `x_scale` is equivalent
        to reformulating the problem in scaled variables ``xs = x / x_scale``.
        An alternative view is that the size of a trust region along jth
        dimension is proportional to ``x_scale[j]``. Improved convergence may
        be achieved by setting `x_scale` such that a step of a given size
        along any of the scaled variables has a similar effect on the cost
        function. If set to 'jac', the scale is iteratively updated using the
        inverse norms of the columns of the Jacobian matrix (as described in
        [JJMore]_).
    max_nfev : None or int, optional
        Maximum number of function evaluations before the termination.
        Defaults to 100 * n.
    tr_solver : str, optional
        Method for solving trust-region subproblems.

            * 'exact' is suitable for not very large problems with dense
              Jacobian matrices. The computational complexity per iteration is
              comparable to a singular value decomposition of the Jacobian
              matrix.
            * 'lsmr' is suitable for problems with sparse and large Jacobian
              matrices. It uses an iterative procedure for finding a solution
              of a linear least-squares problem and only requires matrix-vector
              product evaluations.
    tr_options : dict, optional
        Keyword options passed to trust-region solver.

            * ``tr_solver='exact'``: `tr_options` are ignored.
            * ``tr_solver='lsmr'``: options for `scipy.sparse.linalg.lsmr`.
              Additionally,  ``method='trf'`` supports  'regularize' option
              (bool, default is True), which adds a regularization term to the
              normal equation, which improves convergence if the Jacobian is
              rank-deficient [Byrd]_ (eq. 3.4).
    verbose : int, optional
        Level of algorithm's verbosity.

            * 0 : work silently (default).
            * 1 : display a termination report.
            * 2 : display progress during iterations.

    Returns
    -------
    result : OptimizeResult
        Result of the optimization routine.

    References
    ----------
    .. [STIR] M. A. Branch, T. F. Coleman, and Y. Li, "A Subspace, Interior,
              and Conjugate Gradient Method for Large-Scale Bound-Constrained
              Minimization Problems," SIAM Journal on Scientific Computing,
              Vol. 21, Number 1, pp 1-23, 1999.
    .. [Byrd] R. H. Byrd, R. B. Schnabel and G. A. Shultz, "Approximate
              solution of the trust region problem by minimization over
              two-dimensional subspaces", Math. Programming, 40, pp. 247-263,
              1988.
    .. [JJMore] J. J. More, "The Levenberg-Marquardt Algorithm: Implementation
                and Theory," Numerical Analysis, ed. G. A. Watson, Lecture
                Notes in Mathematics 630, Springer Verlag, pp. 105-116, 1977.

    """
    if tr_options is None:
        tr_options = {}

    if method not in ['trf', 'dogbox']:
        raise ValueError("`method` must be 'trf' or 'dogbox'.")

    if tr_solver not in ['exact', 'lsmr', 'cgls']:
        raise ValueError("`tr_solver` must be one of {'exact', 'lsmr', 'cgls'}.")

    if verbose not in [0, 1, 2]:
        raise ValueError("`verbose` must be in [0, 1, 2].")

    if bounds is None:
        bounds = (-float('inf'), float('inf'))
    elif not (isinstance(bounds, (tuple, list)) and len(bounds) == 2):
        raise ValueError("`bounds` must be a tuple/list with 2 elements.")

    if max_nfev is not None and max_nfev <= 0:
        raise ValueError("`max_nfev` must be None or positive integer.")

    # initial point
    x0 = torch.atleast_1d(x0)
    if torch.is_complex(x0):
        raise ValueError("`x0` must be real.")
    elif x0.dim() > 1:
        raise ValueError("`x0` must have at most 1 dimension.")

    # bounds
    lb, ub = prepare_bounds(bounds, x0)
    if lb.shape != x0.shape or ub.shape != x0.shape:
        raise ValueError("Inconsistent shapes between bounds and `x0`.")
    elif torch.any(lb >= ub):
        raise ValueError("Each lower bound must be strictly less than each "
                         "upper bound.")
    elif not in_bounds(x0, lb, ub):
        raise ValueError("`x0` is infeasible.")

    # x_scale
    x_scale = check_x_scale(x_scale, x0)

    # tolerance
    ftol, xtol, gtol = check_tolerance(ftol, xtol, gtol, method)

    if method == 'trf':
        x0 = make_strictly_feasible(x0, lb, ub)

    def fun_wrapped(x):
        return torch.atleast_1d(fun(x))

    # check function
    f0 = fun_wrapped(x0)
    if f0.dim() != 1:
        raise ValueError("`fun` must return at most 1-d array_like. "
                         "f0.shape: {0}".format(f0.shape))
    elif not f0.isfinite().all():
        raise ValueError("Residuals are not finite in the initial point.")

    initial_cost = 0.5 * f0.dot(f0)

    if isinstance(x_scale, str) and x_scale == 'jac':
        raise ValueError("x_scale='jac' can't be used when `jac` "
                         "returns LinearOperator.")

    if method == 'trf':
        result = trf(fun_wrapped, x0, f0, lb, ub, ftol, xtol, gtol,
                     max_nfev, x_scale, tr_solver, tr_options.copy(), verbose)
    elif method == 'dogbox':
        raise NotImplementedError("'dogbox' method not yet implemented")
        # if tr_solver == 'lsmr' and 'regularize' in tr_options:
        #     warn("The keyword 'regularize' in `tr_options` is not relevant "
        #          "for 'dogbox' method.")
        #     tr_options = tr_options.copy()
        #     del tr_options['regularize']
        # result = dogbox(fun_wrapped, x0, f0, lb, ub, ftol, xtol, gtol,
        #                 max_nfev, x_scale, tr_solver, tr_options, verbose)
    else:
        raise ValueError("`method` must be 'trf' or 'dogbox'.")

    result.message = TERMINATION_MESSAGES[result.status]
    result.success = result.status > 0

    if verbose >= 1:
        print(result.message)
        print("Function evaluations {0}, initial cost {1:.4e}, final cost "
              "{2:.4e}, first-order optimality {3:.2e}."
              .format(result.nfev, initial_cost, result.cost,
                      result.optimality))

    return result