Source code for torchmin.optim.minimizer

from functools import reduce
import torch
from torch.optim import Optimizer


class LinearOperator:
    """A generic linear operator to use with Minimizer"""
    def __init__(self, matvec, shape, dtype=torch.float, device=None):
        self.rmv = matvec
        self.mv = matvec
        self.shape = shape
        self.dtype = dtype
        self.device = device


[docs]class Minimizer(Optimizer): """A general-purpose PyTorch optimizer for unconstrained function minimization. .. warning:: This optimizer doesn't support per-parameter options and parameter groups (there can be only one). .. warning:: Right now all parameters have to be on a single device. This will be improved in the future. Parameters ---------- params : iterable An iterable of :class:`torch.Tensor` s. Specifies what Tensors should be optimized. method : str Minimization method (algorithm) to use. Must be one of the methods offered in :func:`torchmin.minimize()`. Defaults to 'bfgs'. **minimize_kwargs : dict Additional keyword arguments that will be passed to :func:`torchmin.minimize()`. """
[docs] def __init__(self, params, method='bfgs', **minimize_kwargs): assert isinstance(method, str) method_ = method.lower() self._hessp = self._hess = False if method_ in ['bfgs', 'l-bfgs', 'cg']: pass elif method_ in ['newton-cg', 'trust-ncg', 'trust-krylov']: self._hessp = True elif method_ in ['newton-exact', 'dogleg', 'trust-exact']: self._hess = True else: raise ValueError('Unknown method {}'.format(method)) defaults = dict(method=method_, **minimize_kwargs) super().__init__(params, defaults) if len(self.param_groups) != 1: raise ValueError("Minimizer doesn't support per-parameter options") self._nfev = [0] self._params = self.param_groups[0]['params'] self._numel_cache = None self._closure = None self._result = None
@property def nfev(self): return self._nfev[0] def _numel(self): if self._numel_cache is None: self._numel_cache = reduce(lambda total, p: total + p.numel(), self._params, 0) return self._numel_cache def _gather_flat_param(self): params = [] for p in self._params: if p.data.is_sparse: p = p.data.to_dense().view(-1) else: p = p.data.view(-1) params.append(p) return torch.cat(params) def _gather_flat_grad(self): grads = [] for p in self._params: if p.grad is None: g = p.new_zeros(p.numel()) elif p.grad.is_sparse: g = p.grad.to_dense().view(-1) else: g = p.grad.view(-1) grads.append(g) return torch.cat(grads) def _set_flat_param(self, value): offset = 0 for p in self._params: numel = p.numel() p.copy_(value[offset:offset+numel].view_as(p)) offset += numel assert offset == self._numel() def closure(self, x): from torchmin.function import sf_value assert self._closure is not None self._set_flat_param(x) with torch.enable_grad(): f = self._closure() f.backward(create_graph=self._hessp or self._hess) grad = self._gather_flat_grad() grad_out = grad.detach().clone() hessp = None hess = None if self._hessp or self._hess: grad_accum = grad.detach().clone() def hvp(v): assert v.shape == grad.shape grad.backward(gradient=v, retain_graph=True) output = self._gather_flat_grad().detach() - grad_accum grad_accum.add_(output) return output numel = self._numel() if self._hessp: hessp = LinearOperator(hvp, shape=(numel, numel), dtype=grad.dtype, device=grad.device) if self._hess: eye = torch.eye(numel, dtype=grad.dtype, device=grad.device) hess = torch.zeros(numel, numel, dtype=grad.dtype, device=grad.device) for i in range(numel): hess[i] = hvp(eye[i]) return sf_value(f=f.detach(), grad=grad_out.detach(), hessp=hessp, hess=hess) def dir_evaluate(self, x, t, d): from torchmin.function import de_value self._set_flat_param(x + d.mul(t)) with torch.enable_grad(): f = self._closure() f.backward() grad = self._gather_flat_grad() self._set_flat_param(x) return de_value(f=float(f), grad=grad)
[docs] @torch.no_grad() def step(self, closure): """Perform an optimization step. The function "closure" should have a slightly different form vs. the PyTorch standard: namely, it should not include any `backward()` calls. Backward steps will be performed internally by the optimizer. >>> def closure(): >>> optimizer.zero_grad() >>> output = model(input) >>> loss = loss_fn(output, target) >>> # loss.backward() <-- skip this step! >>> return loss Parameters ---------- closure : callable A function that re-evaluates the model and returns the loss. """ from torchmin.minimize import minimize # sanity check assert len(self.param_groups) == 1 # overwrite closure closure_ = closure def closure(): self._nfev[0] += 1 return closure_() self._closure = closure # get initial value x0 = self._gather_flat_param() # perform parameter update kwargs = {k:v for k,v in self.param_groups[0].items() if k != 'params'} self._result = minimize(self, x0, **kwargs) # set final value self._set_flat_param(self._result.x) return self._result.fun