Source code for zfit.minimizers.minimizer_lm

#  Copyright (c) 2024 zfit

from __future__ import annotations

from collections.abc import Mapping

import numpy as np

import zfit.z.numpy as znp

from ..core.interfaces import ZfitLoss
from ..core.parameter import Parameter, assign_values
from ..util.cache import GraphCachable
from ..util.exception import MaximumIterationReached
from .baseminimizer import BaseMinimizer, minimize_supports
from .fitresult import Approximations, FitResult
from .strategy import ZfitStrategy
from .termination import ConvergenceCriterion


class OptimizeStop(Exception):
    pass


[docs] class LevenbergMarquardt(BaseMinimizer, GraphCachable): _DEFAULT_name = "LM" def __init__( self, tol: float | None = None, mode: int | None = None, rho_min: float | None = None, rho_max: float | None = None, verbosity: int | None = None, options: Mapping[str, object] | None = None, maxiter: int | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str | None = None, ): """Levenberg-Marquardt minimizer for general non-linear minimization by interpolating between Gauss-Newton and Gradient descent optimization. LM minimizes a function by iteratively solving a locally linearized version of the problem. Using the gradient (g) and the Hessian (H) of the loss function, the algorithm determines a step (h) that minimizes the loss function by solving :math:`Hh = g`. This works perfectly in one step for linear problems, however for non-linear problems it may be unstable far from the minimum. Thus a scalar damping parameter (L) is introduced and the Hessian is modified based on this damping. The form of the modification depends on the ``mode`` parameter, where the simplest example is :math:`H' = H + L \\mathbb{I}` (where :math:`\\mathbb{I}` is the identity). For a clarifying discussion of the LM algorithm see: Gavin, Henri 2016 (https://msulaiman.org/onewebmedia/LM%20Method%20matlab%20codes%20and%20implementation.pdf) Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| mode: The mode of the LM algorithm. The default mode (0) is the simplest form of the LM algorithm. Other modes may be implemented in the future. rho_min: The minimum acceptable value of the ratio of the actual reduction in the loss function to the expected reduction. If the ratio is less than this value, the damping parameter is increased. rho_max: The maximum acceptable value of the ratio of the actual reduction in the loss function to the expected reduction. If the ratio is greater than this value, the damping parameter is decreased. verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| options: |@doc:minimizer.options||@docend:minimizer.options| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name||@docend:minimizer.name """ mode = 0 if mode is None else mode if mode != 0: msg = "Only mode 0 is currently implemented" raise NotImplementedError(msg) rho_min = 0.1 if rho_min is None else rho_min if rho_min <= 0: msg = "rho_min has to be > 0" raise ValueError(msg) rho_max = 2.0 if rho_max is None else rho_max if rho_max <= 1: msg = "rho_max has to be > 1" raise ValueError(msg) self.mode = mode self.rho_min = rho_min self.rho_max = rho_max self.Lup = 11 self.Ldn = 9 super().__init__( name=name, strategy=strategy, tol=tol, verbosity=verbosity, criterion=criterion, maxiter=maxiter, minimizer_options=options, ) @staticmethod def _damped_hess0(hess, L): I = znp.eye(hess.shape[0]) # noqa: E741 D = znp.ones_like(hess) - I return hess * (I + D / (1 + L)) + L * I * (1 + znp.diag(hess)) def _mode0_step(self, loss, params, L): init_chi2, grad = loss.value_gradient(params) hess = loss.hessian(params) grad = grad[..., None] # right shape for the solve best = (znp.zeros_like(params), init_chi2, L) scary_best = (None, init_chi2, L) direction = "none" nostep = True for _ in range(10): # Determine damped hessian damped_hess = self._damped_hess0(hess, L) # Solve for step with minimum chi^2 h = znp.linalg.solve(damped_hess, -grad) # Calculate expected improvement in chi^2 expected_improvement = -znp.transpose(h) @ damped_hess @ h - 2 * znp.transpose(h) @ grad # Calculate new chi^2 params1 = params + h[:, 0] chi2 = loss.value(params1) # Skip if chi^2 is nan if not np.isfinite(chi2): L *= self.Lup if direction == "better": break direction = "worse" continue # Keep track of any chi^2 improvement if chi2 <= scary_best[1]: scary_best = (h, chi2, L) # Check if chi^2 improved the expected amount rho = (init_chi2 - chi2) / expected_improvement if rho < self.rho_min or rho > self.rho_max: L *= self.Lup if L > 1e9 or direction == "better": break direction = "worse" continue # Check for new best chi^2 if chi2 < best[1]: best = (h, chi2, L) L /= self.Ldn nostep = False if L < 1e-9 or direction == "worse": break direction = "better" elif chi2 >= best[1] and direction in ["worse", "none"] and L < 1e9: L *= self.Lup direction = "worse" continue else: # chi2 >= best[1] and direction == "better" break # Break if step already very successful (> 10% improvement) if (init_chi2 - best[1]) / init_chi2 > 0.1: break if nostep: # use any improvement if a safe improvement could not be found if scary_best[0] is not None: return scary_best msg = "No step found" raise OptimizeStop(msg) return {"step": best, "hessian": damped_hess, "gradient": grad[:, 0]} @minimize_supports(init=True) def _minimize(self, loss: ZfitLoss, params: list[Parameter], init): if init: assign_values(params=params, values=init) evaluator = self.create_evaluator(loss=loss, params=params) criterion = self.create_criterion(loss=loss, params=params) loss_history = [evaluator.value(params)] L = 1.0 success = False paramvals = znp.asarray(params) step = None approx = None iterator = range(self.get_maxiter(n=len(params))) for _niter in iterator: try: new_point = self._mode0_step(evaluator, paramvals, L) step = new_point["step"] except OptimizeStop: if self.verbosity >= 7: print(f"OptimizeStop, Iteration {_niter}, loss: {loss_history[-1]}") break L = step[2] loss_history.append(step[1]) paramvals += step[0][:, 0] approx = Approximations(params=params, gradient=new_point.get("gradient"), hessian=new_point.get("hessian")) tempres = FitResult( loss=loss, params=dict(zip(params, paramvals)), minimizer=self, valid=False, criterion=criterion, edm=-999, fminopt=loss_history[-1], approx=approx, ) success = criterion.converged(tempres) if self.verbosity >= 7: print( f"Iteration {_niter}, loss: {loss_history[-1]}, success: {success}, criterion: {criterion.last_value}" ) if success: if self.verbosity >= 6: print(f"Converged with criterion: {criterion.last_value} < {self.tol}") break else: msg = f"Maximum number of iterations ({self.maxiter}) reached." raise MaximumIterationReached(msg) msg = "No step taken. This should not happen?" assert step is not None, msg assign_values(params, paramvals) return FitResult( loss=loss, params={p: float(p.value()) for p in params}, edm=criterion.last_value, fminopt=loss_history[-1], minimizer=self, valid=success, criterion=criterion, approx=approx, evaluator=evaluator, )