Source code for torchmin.constrained.lbfgsb

import torch
from torch import Tensor
from scipy.optimize import OptimizeResult

from .._optimize import _status_message
from ..function import ScalarFunction


class L_BFGS_B:
    """Limited-memory BFGS Hessian approximation for bounded optimization.

    This class maintains the L-BFGS history and provides methods for
    computing search directions within bound constraints.
    """
    def __init__(self, x, history_size=10):
        self.y = []
        self.s = []
        self.rho = []
        self.theta = 1.0  # scaling factor
        self.history_size = history_size
        self.n_updates = 0

    def solve(self, grad, x, lb, ub, theta=None):
        """Compute search direction: -H * grad, respecting bounds.

        Parameters
        ----------
        grad : Tensor
            Current gradient
        x : Tensor
            Current point
        lb : Tensor
            Lower bounds
        ub : Tensor
            Upper bounds
        theta : float, optional
            Scaling factor. If None, uses stored value.

        Returns
        -------
        d : Tensor
            Search direction
        """
        if theta is not None:
            self.theta = theta

        mem_size = len(self.y)
        if mem_size == 0:
            # No history yet, use scaled steepest descent
            return grad.neg() * self.theta

        # Two-loop recursion
        alpha = torch.zeros(mem_size, dtype=grad.dtype, device=grad.device)
        q = grad.clone()

        # First loop: backward pass
        for i in reversed(range(mem_size)):
            alpha[i] = self.rho[i] * self.s[i].dot(q)
            q.add_(self.y[i], alpha=-alpha[i])

        # Apply initial Hessian approximation
        r = q * self.theta

        # Second loop: forward pass
        for i in range(mem_size):
            beta = self.rho[i] * self.y[i].dot(r)
            r.add_(self.s[i], alpha=alpha[i] - beta)

        return -r

    def update(self, s, y):
        """Update the L-BFGS history with new correction pair.

        Parameters
        ----------
        s : Tensor
            Step vector (x_new - x)
        y : Tensor
            Gradient difference (g_new - g)
        """
        # Check curvature condition
        sy = s.dot(y)
        if sy <= 1e-10:
            # Skip update if curvature is too small
            return False

        yy = y.dot(y)

        # Update scaling factor (theta = s'y / y'y)
        if yy > 1e-10:
            self.theta = sy / yy

        # Update history
        if len(self.y) >= self.history_size:
            self.y.pop(0)
            self.s.pop(0)
            self.rho.pop(0)

        self.y.append(y.clone())
        self.s.append(s.clone())
        self.rho.append(1.0 / sy)
        self.n_updates += 1

        return True


def _project_bounds(x, lb, ub):
    """Project x onto the box [lb, ub]."""
    return torch.clamp(x, lb, ub)


def _gradient_projection(x, g, lb, ub):
    """Compute the projected gradient.

    Returns the projected gradient and identifies the active set.
    """
    # Project gradient: if at bound and gradient points out, set to zero
    g_proj = g.clone()

    # At lower bound with positive gradient
    at_lb = (x <= lb + 1e-10) & (g > 0)
    g_proj[at_lb] = 0

    # At upper bound with negative gradient
    at_ub = (x >= ub - 1e-10) & (g < 0)
    g_proj[at_ub] = 0

    return g_proj


[docs]@torch.no_grad() def _minimize_lbfgsb( fun, x0, bounds=None, lr=1.0, history_size=10, max_iter=None, gtol=1e-5, ftol=1e-9, normp=float('inf'), callback=None, disp=0, return_all=False): """Minimize a scalar function with L-BFGS-B. L-BFGS-B [1]_ is a limited-memory quasi-Newton method for bound-constrained optimization. It extends L-BFGS to handle box constraints. Parameters ---------- fun : callable Scalar objective function to minimize. x0 : Tensor Initialization point. bounds : tuple of Tensor, optional Bounds for variables as (lb, ub) where lb and ub are Tensors of the same shape as x0. Use float('-inf') and float('inf') for unbounded variables. If None, equivalent to unbounded. lr : float Step size for parameter updates (used as initial step in line search). history_size : int History size for L-BFGS Hessian estimates. max_iter : int, optional Maximum number of iterations. Defaults to 200 * x0.numel(). gtol : float Termination tolerance on projected gradient norm. ftol : float Termination tolerance on function/parameter changes. normp : Number or str Norm type for termination conditions. Can be any value supported by torch.norm. callback : callable, optional Function to call after each iteration: callback(x). disp : int or bool Display (verbosity) level. Set to >0 to print status messages. return_all : bool, optional Set to True to return a list of the best solution at each iteration. Returns ------- result : OptimizeResult Result of the optimization routine. References ---------- .. [1] Byrd, R. H., Lu, P., Nocedal, J., & Zhu, C. (1995). A limited memory algorithm for bound constrained optimization. SIAM Journal on Scientific Computing, 16(5), 1190-1208. """ lr = float(lr) disp = int(disp) if max_iter is None: max_iter = x0.numel() * 200 # Set up bounds x = x0.detach().view(-1).clone(memory_format=torch.contiguous_format) n = x.numel() if bounds is None: lb = torch.full_like(x, float('-inf')) ub = torch.full_like(x, float('inf')) else: lb, ub = bounds lb = lb.detach().view(-1).clone(memory_format=torch.contiguous_format) ub = ub.detach().view(-1).clone(memory_format=torch.contiguous_format) if lb.shape != x.shape or ub.shape != x.shape: raise ValueError('Bounds must have the same shape as x0') # Project initial point onto feasible region x = _project_bounds(x, lb, ub) # Construct scalar objective function sf = ScalarFunction(fun, x0.shape) closure = sf.closure # Compute initial function and gradient f, g, _, _ = closure(x) if disp > 1: print('initial fval: %0.4f' % f) print('initial gnorm: %0.4e' % g.norm(p=normp)) if return_all: allvecs = [x.clone()] # Initialize L-BFGS approximation hess = L_BFGS_B(x, history_size) # Main iteration loop for n_iter in range(1, max_iter + 1): # ======================================== # Check projected gradient convergence # ======================================== g_proj = _gradient_projection(x, g, lb, ub) g_proj_norm = g_proj.norm(p=normp) if disp > 1: print('iter %3d - fval: %0.4f, gnorm: %0.4e' % (n_iter, f, g_proj_norm)) if g_proj_norm <= gtol: warnflag = 0 msg = _status_message['success'] break # ======================================== # Compute search direction # ======================================== # Use projected gradient for search direction computation # This ensures we only move in directions away from active constraints d = hess.solve(g_proj, x, lb, ub) # Ensure direction is a descent direction w.r.t. original gradient gtd = g.dot(d) if gtd > -1e-10: # Not a descent direction, use projected steepest descent d = -g_proj gtd = g.dot(d) # Find maximum step length that keeps us feasible alpha_max = 1.0 for i in range(x.numel()): if d[i] > 1e-10: # Moving toward upper bound if ub[i] < float('inf'): alpha_max = min(alpha_max, (ub[i] - x[i]) / d[i]) elif d[i] < -1e-10: # Moving toward lower bound if lb[i] > float('-inf'): alpha_max = min(alpha_max, (lb[i] - x[i]) / d[i]) # Take a step with line search on the feasible segment # Simple backtracking: try alpha_max, 0.5*alpha_max, etc. alpha = alpha_max for _ in range(10): x_new = x + alpha * d x_new = _project_bounds(x_new, lb, ub) f_new, g_new, _, _ = closure(x_new) # Armijo condition (sufficient decrease) if f_new <= f + 1e-4 * alpha * gtd: break alpha *= 0.5 else: # Line search failed, take a small step x_new = x + 0.01 * alpha_max * d x_new = _project_bounds(x_new, lb, ub) f_new, g_new, _, _ = closure(x_new) if return_all: allvecs.append(x_new.clone()) if callback is not None: if callback(x_new.view_as(x0)): warnflag = 5 msg = _status_message['callback_stop'] break # ======================================== # Update Hessian approximation # ======================================== s = x_new - x y = g_new - g # Update L-BFGS (skip if curvature condition fails) hess.update(s, y) # ======================================== # Check convergence by small progress # ======================================== # Convergence by insufficient progress (be more lenient than gtol) if (s.norm(p=normp) <= ftol) and ((f_new - f).abs() <= ftol): # Double check with projected gradient g_proj_new = _gradient_projection(x_new, g_new, lb, ub) if g_proj_new.norm(p=normp) <= gtol: warnflag = 0 msg = _status_message['success'] break # Check for precision loss if not f_new.isfinite(): warnflag = 2 msg = _status_message['pr_loss'] break # Update state f = f_new x = x_new g = g_new else: # Maximum iterations reached warnflag = 1 msg = _status_message['maxiter'] if disp: print(msg) print(" Current function value: %f" % f) print(" Iterations: %d" % n_iter) print(" Function evaluations: %d" % sf.nfev) result = OptimizeResult( fun=f, x=x.view_as(x0), grad=g.view_as(x0), status=warnflag, success=(warnflag == 0), message=msg, nit=n_iter, nfev=sf.nfev ) if return_all: result['allvecs'] = [v.view_as(x0) for v in allvecs] return result