Source code for torchmin.trustregion.krylov

"""
TODO: this module is not yet complete. It is not ready for use.
"""
import numpy as np
from scipy.linalg import eigh_tridiagonal, get_lapack_funcs
import torch

from .base import _minimize_trust_region, BaseQuadraticSubproblem


[docs]def _minimize_trust_krylov(fun, x0, **trust_region_options): """Minimization of scalar function of one or more variables using the GLTR Krylov subspace trust-region algorithm. .. warning:: This minimizer is in early stages and has not been rigorously tested. It may change in the near future. Parameters ---------- fun : callable Scalar objective function to minimize. x0 : Tensor Initialization point. initial_tr_radius : float Initial trust-region radius. max_tr_radius : float Maximum value of the trust-region radius. No steps that are longer than this value will be proposed. eta : float Trust region related acceptance stringency for proposed steps. gtol : float Gradient norm must be less than ``gtol`` before successful termination. Returns ------- result : OptimizeResult Result of the optimization routine. Notes ----- This trust-region solver is based on the GLTR algorithm as described in [1]_ and [2]_. References ---------- .. [1] F. Lenders, C. Kirches, and A. Potschka, "trlib: A vector-free implementation of the GLTR method for...", arXiv:1611.04718. .. [2] N. Gould, S. Lucidi, M. Roma, P. Toint: “Solving the Trust-Region Subproblem using the Lanczos Method”, SIAM J. Optim., 9(2), 504–525, 1999. .. [3] J. Nocedal and S. Wright, "Numerical optimization", Springer Science & Business Media. pp. 83-91, 2006. """ return _minimize_trust_region(fun, x0, subproblem=KrylovSubproblem, **trust_region_options)
class KrylovSubproblem(BaseQuadraticSubproblem): """The GLTR trust region sub-problem defined on an expanding Krylov subspace. Based on the implementation of GLTR described in [1]_. References ---------- .. [1] F. Lenders, C. Kirches, and A. Potschka, "trlib: A vector-free implementation of the GLTR method for...", arXiv:1611.04718. .. [2] N. Gould, S. Lucidi, M. Roma, P. Toint: “Solving the Trust-Region Subproblem using the Lanczos Method”, SIAM J. Optim., 9(2), 504–525, 1999. .. [3] J. Nocedal and S. Wright, "Numerical optimization", Springer Science & Business Media. pp. 83-91, 2006. """ hess_prod = True max_lanczos = None max_ms_iters = 500 # max iterations of the Moré-Sorensen loop def __init__(self, x, fun, k_easy=0.1, k_hard=0.2, tol=1e-5, ortho=True, debug=False): super().__init__(x, fun) self.eps = torch.finfo(x.dtype).eps self.k_easy = k_easy self.k_hard = k_hard self.tol = tol self.ortho = ortho self._debug = debug def tridiag_subproblem(self, Ta, Tb, tr_radius): """Solve the GLTR tridiagonal subproblem. Based on Algorithm 5.2 of [2]_. We factorize as follows: .. math:: T + lambd * I = LDL^T Where `D` is diagonal and `L` unit (lower) bi-diagonal. """ device, dtype = Ta.device, Ta.dtype # convert to numpy Ta = Ta.cpu().numpy() Tb = Tb.cpu().numpy() tr_radius = float(tr_radius) # right hand side rhs = np.zeros_like(Ta) rhs[0] = - float(self.jac_mag) # get LAPACK routines for factorizing and solving sym-PD tridiagonal ptsv, pttrs = get_lapack_funcs(('ptsv', 'pttrs'), (Ta, Tb, rhs)) eig0 = None lambd_lb = 0. lambd = 0. for _ in range(self.max_ms_iters): lambd = max(lambd, lambd_lb) # factor T + lambd * I = LDL^T and solve LDL^T p = rhs d, e, p, info = ptsv(Ta + lambd, Tb, rhs) assert info >= 0 # sanity check if info > 0: assert eig0 is None # sanity check; should only happen once # estimate smallest eigenvalue and continue eig0 = eigh_tridiagonal( Ta, Tb, eigvals_only=True, select='i', select_range=(0,0), lapack_driver='stebz').item() lambd_lb = max(1e-3 - eig0, 0) continue p_norm = np.linalg.norm(p) if p_norm < tr_radius: # TODO: add extra checks status = 0 break elif abs(p_norm - tr_radius) / tr_radius <= self.k_easy: status = 1 break # solve LDL^T q = p and compute <q, p> v, info = pttrs(d, e, p) q_norm2 = v.dot(p) # update lambd lambd += (p_norm**2 / q_norm2) * (p_norm - tr_radius) / tr_radius else: status = -1 p = torch.tensor(p, device=device, dtype=dtype) return p, status, lambd def solve(self, tr_radius): g = self.jac gamma_0 = self.jac_mag n, = g.shape m = n if self.max_lanczos is None else min(n, self.max_lanczos) dtype = g.dtype device = g.device h_best = None error_best = float('inf') # Lanczos Q matrix buffer Q = torch.zeros(m, n, dtype=dtype, device=device) Q[0] = g / gamma_0 # Lanczos T matrix buffers # a and b are the diagonal and off-diagonal entries of T, respectively a = torch.zeros(m, dtype=dtype, device=device) b = torch.zeros(m, dtype=dtype, device=device) # first lanczos iteration r = self.hessp(Q[0]) torch.dot(Q[0], r, out=a[0]) r.sub_(Q[0] * a[0]) torch.linalg.norm(r, out=b[0]) if b[0] < self.eps: raise RuntimeError('initial beta is zero.') # remaining iterations for i in range(1, m): torch.div(r, b[i-1], out=Q[i]) r = self.hessp(Q[i]) r.sub_(Q[i-1] * b[i-1]) torch.dot(Q[i], r, out=a[i]) r.sub_(Q[i] * a[i]) if self.ortho: # Re-orthogonalize with Gram-Schmidt r.addmv_(Q[:i+1].T, Q[:i+1].mv(r), alpha=-1) torch.linalg.norm(r, out=b[i]) if b[i] < self.eps: # This should never occur when self.ortho=True raise RuntimeError('reducible T matrix encountered.') # GLTR sub-problem h, status, lambd = self.tridiag_subproblem(a[:i+1], b[:i], tr_radius) if status >= 0: # convergence check; see Algorithm 1 of [1]_ and # Algorithm 5.1 of [2]_. Equivalent to the following: # p = Q[:i+1].T.mv(h) # error = torch.linalg.norm(self.hessp(p) + lambd * p + g) error = b[i] * h[-1].abs() if self._debug: print('iter %3d - status: %d - lambd: %0.4e - error: %0.4e' % (i+1, status, lambd, error)) if error < error_best: # we've found a new best hits_boundary = status != 0 h_best = h error_best = error if error_best <= self.tol: break elif self._debug: print('iter %3d - status: %d - lambd: %0.4e' % (i+1, status, lambd)) if h_best is None: # TODO: what should we do here? raise RuntimeError('gltr solution not found') # project h back to R^n p_best = Q[:i+1].T.mv(h_best) return p_best, hits_boundary