Source code for zfit.minimizers.minimizer_lm

#  Copyright (c) 2025 zfit

from __future__ import annotations

import typing
from collections.abc import Mapping

import numpy as np

import zfit.z.numpy as znp
from zfit._interfaces import ZfitLoss

from ..core.parameter import Parameter, assign_values
from ..util.cache import GraphCachable
from ..util.exception import MaximumIterationReached
from .baseminimizer import BaseMinimizer, minimize_supports
from .fitresult import Approximations, FitResult
from .strategy import ZfitStrategy
from .termination import ConvergenceCriterion

if typing.TYPE_CHECKING:
    import zfit  # noqa: F401


class OptimizeStop(Exception):
    pass


[docs] class LevenbergMarquardt(BaseMinimizer, GraphCachable): _DEFAULT_name = "LM" def __init__( self, tol: float | None = None, mode: int | None = None, rho_min: float | None = None, rho_max: float | None = None, verbosity: int | None = None, options: Mapping[str, object] | None = None, maxiter: int | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str | None = None, ): """Levenberg-Marquardt minimizer for general non-linear minimization by interpolating between Gauss-Newton and Gradient descent optimization. LM minimizes a function by iteratively solving a locally linearized version of the problem. Using the gradient (g) and the Hessian (H) of the loss function, the algorithm determines a step (h) that minimizes the loss function by solving :math:`Hh = g`. This works perfectly in one step for linear problems, however for non-linear problems it may be unstable far from the minimum. Thus a scalar damping parameter (L) is introduced and the Hessian is modified based on this damping. The form of the modification depends on the ``mode`` parameter, where the simplest example is :math:`H' = H + L \\mathbb{I}` (where :math:`\\mathbb{I}` is the identity). For a clarifying discussion of the LM algorithm see: Gavin, Henri 2016 (https://msulaiman.org/onewebmedia/LM%20Method%20matlab%20codes%20and%20implementation.pdf) Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| mode: The mode of the LM algorithm. The default mode (0) is the simplest form of the LM algorithm. Other modes may be implemented in the future. rho_min: The minimum acceptable value of the ratio of the actual reduction in the loss function to the expected reduction. If the ratio is less than this value, the damping parameter is increased. rho_max: The maximum acceptable value of the ratio of the actual reduction in the loss function to the expected reduction. If the ratio is greater than this value, the damping parameter is decreased. verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| options: |@doc:minimizer.init.options| Additional options for the minimizer. This is a dictionary of options that are passed to the minimizer. The options depend on the minimizer and are not always available. See the documentation of the minimizer for more information. |@docend:minimizer.init.options| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.init.name| Human-readable name of the minimizer. |@docend:minimizer.name| name: |@doc:minimizer.init.name||@docend:minimizer.name """ mode = 0 if mode is None else mode if mode != 0: msg = "Only mode 0 is currently implemented" raise NotImplementedError(msg) rho_min = 0.1 if rho_min is None else rho_min if rho_min <= 0: msg = "rho_min has to be > 0" raise ValueError(msg) rho_max = 2.0 if rho_max is None else rho_max if rho_max <= 1: msg = "rho_max has to be > 1" raise ValueError(msg) self.mode = mode self.rho_min = rho_min self.rho_max = rho_max self.Lup = 11 self.Ldn = 9 super().__init__( name=name, strategy=strategy, tol=tol, verbosity=verbosity, criterion=criterion, maxiter=maxiter, minimizer_options=options, ) @staticmethod def _damped_hess0(hess, L): identity = znp.eye(hess.shape[0]) D = znp.ones_like(hess) - identity return hess * (identity + D / (1 + L)) + L * identity * (1 + znp.diag(hess)) def _mode0_step(self, loss, params, L): init_chi2, grad = loss.value_gradient(params) hess = loss.hessian(params) grad = grad[..., None] # right shape for the solve best = (znp.zeros_like(params), init_chi2, L) scary_best = (None, init_chi2, L) direction = "none" nostep = True for _ in range(10): # Determine damped hessian damped_hess = self._damped_hess0(hess, L) # Solve for step with minimum chi^2 h = znp.linalg.solve(damped_hess, -grad) # Calculate expected improvement in chi^2 expected_improvement = -znp.transpose(h) @ damped_hess @ h - 2 * znp.transpose(h) @ grad # Calculate new chi^2 params1 = params + h[:, 0] chi2 = loss.value(params1) # Skip if chi^2 is nan if not np.isfinite(chi2): L *= self.Lup if direction == "better": break direction = "worse" continue # Keep track of any chi^2 improvement if chi2 <= scary_best[1]: scary_best = (h, chi2, L) # Check if chi^2 improved the expected amount rho = (init_chi2 - chi2) / expected_improvement if rho < self.rho_min or rho > self.rho_max: L *= self.Lup if L > 1e9 or direction == "better": break direction = "worse" continue # Check for new best chi^2 if chi2 < best[1]: best = (h, chi2, L) L /= self.Ldn nostep = False if L < 1e-9 or direction == "worse": break direction = "better" elif chi2 >= best[1] and direction in ["worse", "none"] and L < 1e9: L *= self.Lup direction = "worse" continue else: # chi2 >= best[1] and direction == "better" break # Break if step already very successful (> 10% improvement) if (init_chi2 - best[1]) / init_chi2 > 0.1: break if nostep: # use any improvement if a safe improvement could not be found if scary_best[0] is None: msg = "No step found" raise OptimizeStop(msg) best = scary_best return {"step": best, "hessian": damped_hess, "gradient": grad[:, 0]} @minimize_supports(init=True) def _minimize(self, loss: ZfitLoss, params: list[Parameter], init): if init: assign_values(params=params, values=init) evaluator = self.create_evaluator(loss=loss, params=params) criterion = self.create_criterion(loss=loss, params=params) loss_history = [evaluator.value(params)] L = 1.0 success = False paramvals = znp.asarray(params) step = None approx = None iterator = range(self.get_maxiter(n=len(params))) for _niter in iterator: try: new_point = self._mode0_step(evaluator, paramvals, L) step = new_point["step"] except OptimizeStop: if self.verbosity >= 7: print(f"OptimizeStop, Iteration {_niter}, loss: {loss_history[-1]}") break L = step[2] loss_history.append(step[1]) paramvals += step[0][:, 0] approx = Approximations(params=params, gradient=new_point.get("gradient"), hessian=new_point.get("hessian")) tempres = FitResult( loss=loss, params=dict(zip(params, paramvals, strict=True)), minimizer=self, valid=False, criterion=criterion, edm=-999, fminopt=loss_history[-1], approx=approx, ) success = criterion.converged(tempres) if self.verbosity >= 7: print( f"Iteration {_niter}, loss: {loss_history[-1]}, success: {success}, criterion: {criterion.last_value}" ) if success: if self.verbosity >= 6: print(f"Converged with criterion: {criterion.last_value} < {self.tol}") break else: msg = f"Maximum number of iterations ({self.maxiter}) reached." raise MaximumIterationReached(msg) msg = "No step taken. This should not happen?" assert step is not None, msg assign_values(params, paramvals) return FitResult( loss=loss, params={p: float(p.value()) for p in params}, edm=criterion.last_value, fminopt=loss_history[-1], minimizer=self, valid=success, criterion=criterion, approx=approx, evaluator=evaluator, )