Source code for zfit.minimizers.minimizers_scipy

#  Copyright (c) 2024 zfit

from __future__ import annotations

import copy
import inspect
import math
from collections.abc import Callable, Mapping

import numpy as np
import scipy.optimize
from scipy.optimize import BFGS, HessianUpdateStrategy

from ..core.parameter import assign_values
from ..util.container import convert_to_container
from ..util.exception import MaximumIterationReached
from ..util.warnings import warn_experimental_feature
from .baseminimizer import (
    NOT_SUPPORTED,
    BaseMinimizer,
    minimize_supports,
    print_minimization_status,
)
from .fitresult import FitResult
from .strategy import ZfitStrategy
from .termination import CRITERION_NOT_AVAILABLE, ConvergenceCriterion


class ScipyBaseMinimizerV1(BaseMinimizer):
    _VALID_SCIPY_GRADIENT = None
    _VALID_SCIPY_HESSIAN = None

    def __init__(
        self,
        method: str,
        tol: float | None,
        internal_tol: Mapping[str, float | None],
        gradient: Callable | str | NOT_SUPPORTED | None,
        hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy | NOT_SUPPORTED),
        maxiter: int | str | None = None,
        minimizer_options: Mapping[str, object] | None = None,
        verbosity: int | None = None,
        strategy: ZfitStrategy | None = None,
        criterion: ConvergenceCriterion | None = None,
        minimize_func: callable | None = None,
        initializer: Callable | None = None,
        verbosity_setter: Callable | None = None,
        name: str = "ScipyMinimizer",
    ) -> None:
        """Base minimizer wrapping the SciPy librarys optimize module.

        To implemend a subclass, inherit from this class and:
        - override ``_minimize`` (which has the same signature as :meth:`~BaseMinimizer.minimize`.
          and decorate it with ``minimize_supports``.
        - (optional) add the allowed methods for gradients and hessian with Class._add_derivative_methods(...)


        Args:
            method: Name of the method as given to :func:`~scipy.optimize.minimize`
            tol: |@doc:minimizer.tol| Termination value for the
                   convergence/stopping criterion of the algorithm
                   in order to determine if the minimum has
                   been found. Defaults to 1e-3. |@docend:minimizer.tol|
            maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
                   This corresponds to roughly the maximum number of
                   evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
            minimizer_options:
            verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
              The verbosity has the meaning:

               - a value of 0 means quiet and no output
               - above 0 up to 5, information that is good to know but without
                 flooding the user, corresponding to a "INFO" level.
               - A value above 5 starts printing out considerably more and
                 is used more for debugging purposes.
               - Setting the verbosity to 10 will print out every
                 evaluation of the loss function and gradient.

               Some minimizers offer additional output which is also
               distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
            strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
                   input arguments in the init. Determines the behavior of the minimizer in
                   certain situations, most notably when encountering
                   NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
            criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
                   estimated measure for the distance to the
                   minimum and can include the relative
                   or absolute changes of the parameters,
                   function value, gradients and more.
                   If the value of the criterion is smaller
                   than ``loss.errordef * tol``, the algorithm
                   stopps and it is assumed that the minimum
                   has been found. |@docend:minimizer.criterion|
            minimize_func:
            initializer:
            verbosity_setter:
            name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
        """
        self._minimize_func = scipy.optimize.minimize if minimize_func is None else minimize_func

        if initializer is None:

            def initializer(options, init, step_size):
                del init, step_size
                return options

        if not callable(initializer):
            msg = f"Initializer has to be callable not {initializer}"
            raise TypeError(msg)
        self._scipy_initializer = initializer

        if verbosity_setter is None:

            def verbosity_setter(options, verbosity):
                del verbosity
                return options

        if not callable(verbosity_setter):
            msg = f"verbosity_setter has to be callable not {verbosity_setter}"
            raise TypeError(msg)
        self._scipy_verbosity_setter = verbosity_setter

        minimizer_options = {} if minimizer_options is None else minimizer_options
        minimizer_options = copy.copy(minimizer_options)
        minimizer_options["method"] = method

        if "options" not in minimizer_options:
            minimizer_options["options"] = {}

        if gradient in (True, "2-point", "3-point") and not (
            isinstance(hessian, HessianUpdateStrategy) or hessian is NOT_SUPPORTED
        ):
            msg = (
                "Whenever the gradient is estimated via finite-differences, "
                "the Hessian has to be estimated using one of the quasi-Newton strategies."
            )
            raise ValueError(msg)

        if gradient is not NOT_SUPPORTED:
            if self._VALID_SCIPY_GRADIENT is not None and gradient not in self._VALID_SCIPY_GRADIENT:
                msg = (
                    f"Requested gradient {gradient} is not a valid choice. Possible"
                    f" gradient methods are {self._VALID_SCIPY_GRADIENT}"
                )
                raise ValueError(msg)
            if gradient is False or gradient is None:
                gradient = "zfit"

            elif gradient is True:
                gradient = None
            minimizer_options["grad"] = gradient

        if hessian is not NOT_SUPPORTED:
            if self._VALID_SCIPY_HESSIAN is not None and hessian not in self._VALID_SCIPY_HESSIAN:
                msg = (
                    f"Requested hessian {hessian} is not a valid choice. Possible"
                    f" hessian methods are {self._VALID_SCIPY_HESSIAN}"
                )
                raise ValueError(msg)
            if isinstance(hessian, scipy.optimize.HessianUpdateStrategy) and not inspect.isclass(hessian):
                msg = (
                    "If `hesse` is a HessianUpdateStrategy, it has to be a class that takes `init_scale`,"
                    " not an instance. For further modification of other initial parameters, make a"
                    " subclass of the update strategy."
                )
                raise ValueError(msg)
            if hessian is True:
                hessian = None
            elif hessian is False or hessian is None:
                hessian = "zfit"
            minimizer_options["hess"] = hessian

        self._internal_tol = internal_tol
        self._internal_maxiter = 20
        self._nrandom_max = 5
        super().__init__(
            name=name,
            tol=tol,
            verbosity=verbosity,
            minimizer_options=minimizer_options,
            strategy=strategy,
            criterion=criterion,
            maxiter=maxiter,
        )

    @classmethod
    def _add_derivative_methods(cls, gradient=None, hessian=None):
        gradient = convert_to_container(gradient, container=set)
        hessian = convert_to_container(hessian, container=set)
        if gradient is not None:
            if cls._VALID_SCIPY_GRADIENT is None:
                cls._VALID_SCIPY_GRADIENT = set()
            cls._VALID_SCIPY_GRADIENT.update(gradient)
        if hessian is not None:
            if cls._VALID_SCIPY_HESSIAN is None:
                cls._VALID_SCIPY_HESSIAN = set()
            cls._VALID_SCIPY_HESSIAN.update(hessian)

    @classmethod
    def __init_subclass__(cls, **kwargs):
        super().__init_subclass__(**kwargs)
        if cls._VALID_SCIPY_GRADIENT is not None:
            cls._VALID_SCIPY_GRADIENT = ScipyBaseMinimizerV1._VALID_SCIPY_GRADIENT.copy()
        if cls._VALID_SCIPY_HESSIAN is not None:
            cls._VALID_SCIPY_HESSIAN = ScipyBaseMinimizerV1._VALID_SCIPY_HESSIAN.copy()

    @minimize_supports(init=True)
    def _minimize(self, loss, params, init: FitResult):
        if init:
            assign_values(params=params, values=init)
        result_prelim = init

        evaluator = self.create_evaluator(loss=loss, params=params, numpy_converter=np.array)

        limits = [(float(p.lower), float(p.upper)) for p in params]
        init_values = np.array(params)

        minimizer_options = self.minimizer_options.copy()

        minimizer_options["bounds"] = limits

        use_gradient = "grad" in minimizer_options
        if use_gradient:
            gradient = minimizer_options.pop("grad")
            gradient = evaluator.gradient if gradient == "zfit" else gradient
            minimizer_options["jac"] = gradient

        use_hessian = "hess" in minimizer_options
        if use_hessian:
            hessian = minimizer_options.pop("hess")
            hessian = evaluator.hessian if hessian == "zfit" else hessian
            minimizer_options["hess"] = hessian

            is_update_strat = inspect.isclass(hessian) and issubclass(hessian, scipy.optimize.HessianUpdateStrategy)

            init_scale = "auto"
            # get possible initial step size from previous minimizer

        approx_step_sizes = None
        if init:
            approx_init_hesse = result_prelim.hesse(params=params, method="approx", name="approx")
            if approx_init_hesse:
                approx_step_sizes = [val["error"] for val in approx_init_hesse.values()] or None
        if approx_step_sizes is None:
            approx_step_sizes = np.array([0.1 if p.step_size is None else p.step_size for p in params])

        if (maxiter := self.get_maxiter(len(params))) is not None:
            # stop 3 iterations earlier than we
            minimizer_options["options"]["maxiter"] = maxiter - 3 if maxiter > 10 else maxiter

        minimizer_options["options"]["disp"] = self.verbosity > 6
        minimizer_options["options"] = self._scipy_verbosity_setter(
            minimizer_options["options"], verbosity=self.verbosity
        )

        # tolerances and criterion
        criterion = self.create_criterion(loss, params)

        init_tol = min([math.sqrt(loss.errordef * self.tol), loss.errordef * self.tol * 1e3])
        internal_tol = self._internal_tol
        internal_tol = {tol: init_tol if init is None else init for tol, init in internal_tol.items()}

        valid = None
        message = None
        optimize_results = None
        nrandom = 0
        old_edm = -1
        n_paramatlim = 0
        hessian_updater = None
        for i in range(self._internal_maxiter):
            minimizer_options["options"] = self._scipy_initializer(
                minimizer_options["options"],
                init=result_prelim,
                step_size=approx_step_sizes,
            )

            # update from previous run/result
            if use_hessian and is_update_strat:
                if not isinstance(init_scale, str):
                    init_scale = np.mean([approx for approx in approx_step_sizes if approx is not None])
                if i == 0:
                    hessian_updater = hessian(init_scale=init_scale)
                    minimizer_options["hess"] = hessian_updater
                else:
                    minimizer_options["hess"] = hessian_updater
            for tol, val in internal_tol.items():
                minimizer_options["options"][tol] = val

            # perform minimization
            optim_result = None
            try:
                optim_result = self._minimize_func(fun=evaluator.value, x0=init_values, **minimizer_options)
            except MaximumIterationReached as error:
                if optim_result is None:  # it didn't even run once
                    msg = (
                        "Maximum iteration reached on first wrapped minimizer call. This"
                        "is likely to a too low number of maximum iterations (currently"
                        f" {evaluator.maxiter}) or wrong internal tolerances, in which"
                        f" case: please fill an issue on github."
                    )
                    raise MaximumIterationReached(msg) from error
                maxiter_reached = True
                valid = False
                message = "Maxiter reached, terminated without convergence"
            else:
                maxiter_reached = evaluator.niter > evaluator.maxiter

            values = optim_result["x"]

            fmin = optim_result.fun
            assign_values(params, values)

            optimize_results = combine_optimize_results(
                [optim_result] if optimize_results is None else [optimize_results, optim_result]
            )
            result_prelim = FitResult.from_scipy(
                loss=loss,
                params=params,
                result=optimize_results,
                minimizer=self,
                edm=CRITERION_NOT_AVAILABLE,
                criterion=None,
                message="INTERNAL for Criterion",
                valid=valid,
            )
            if result_prelim.params_at_limit:
                n_paramatlim += 1
            approx_init_hesse = result_prelim.hesse(params=params, method="approx", name="approx")
            if approx_init_hesse:
                approx_step_sizes = [val["error"] for val in approx_init_hesse.values()] or None
            converged = criterion.converged(result_prelim)
            valid = converged
            edm = criterion.last_value

            if self.verbosity > 5:
                print_minimization_status(
                    converged=converged,
                    criterion=criterion,
                    evaluator=evaluator,
                    i=i,
                    fminopt=fmin,
                    internal_tol=internal_tol,
                )

            if math.isclose(old_edm, edm, rel_tol=1e-4, abs_tol=1e-12):
                if nrandom < self._nrandom_max:  # in order not to start too close
                    rnd_range = np.ones_like(values) if approx_step_sizes is None else approx_step_sizes
                    rnd_range_no_nan = np.nan_to_num(rnd_range, nan=1.0)
                    values += np.random.uniform(low=-rnd_range_no_nan, high=rnd_range_no_nan) / 5
                    nrandom += 1
                else:
                    message = f"Stuck (no change in a few iterations) at the edm={edm}"
                    valid = False
                    break
            old_edm = edm
            if n_paramatlim > 4:
                message = "Parameters too often at limit during minimization."
                break
            if converged or maxiter_reached:
                break
            init_values = values

            # update the tolerances
            self._update_tol_inplace(criterion_value=edm, internal_tol=internal_tol)

        else:
            message = f"Invalid, criterion {criterion.name} is {edm}, target {self.tol} not reached."
            valid = False
        return FitResult.from_scipy(
            loss=loss,
            params=params,
            result=optimize_results,
            minimizer=self,
            valid=valid,
            criterion=criterion,
            edm=edm,
            message=message,
            niter=evaluator.niter,
            evaluator=evaluator,
        )


[docs] class ScipyLBFGSBV1(ScipyBaseMinimizerV1): def __init__( self, tol: float | None = None, maxcor: int | None = None, maxls: int | None = None, verbosity: int | None = None, gradient: Callable | str | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy L-BFGS-B V1", ) -> None: """Local, gradient based quasi-Newton algorithm using the limited-memory BFGS approximation. Limited-memory BFGS is an optimization algorithm in the family of quasi-Newton methods that approximates the Broyden-Fletcher-Goldfarb-Shanno algorithm (BFGS) using a limited amount of memory (or gradients, controlled by *maxcor*). L-BFGS borrows ideas from the trust region methods while keeping the L-BFGS update of the Hessian and line search algorithms. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| maxcor: |@doc:minimizer.maxcor| Maximum number of memory history to keep when using a quasi-Newton update formula such as BFGS. It is the number of gradients to “remember” from previous optimization steps: increasing it increases the memory requirements but may speed up the convergence. |@docend:minimizer.maxcor| maxls: |@doc:minimizer.init.maxls| Maximum number of linesearch points. |@docend:minimizer.init.maxls| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| Increasing the verbosity will gradually increase the output. gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation that the minimizer should use. This can be the gradient provided by the loss itself or method from the minimizer. In general, using the zfit provided automatic gradient is more precise and needs less computation time for the evaluation compared to a numerical method, but it may not always be possible. In this case, zfit switches to a generic, numerical gradient which in general performs worse than if the minimizer has its own numerical gradient. The following are possible choices: If set to ``False`` or ``'zfit'`` (or ``None``; default), the gradient of the loss (usually the automatic gradient) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient| |@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal gradient estimation. This can be specified more clearly using the arguments ``'2-point'`` and ``'3-point'``, which specify the numerical algorithm the minimizer should use in order to estimate the gradient. |@docend:minimizer.scipy.gradient.internal| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} if maxcor is not None: options["maxcor"] = maxcor if maxls is not None: options["maxls"] = maxls minimizer_options = {} if options: minimizer_options["options"] = options def verbosity_setter(options, verbosity): options.pop("disp", None) options["iprint"] = int((verbosity - 2) * 12.5) # negative is quite, goes to 100. start at verbosity > 1 return options scipy_tols = {"ftol": None, "gtol": None} super().__init__( method="L-BFGS-B", internal_tol=scipy_tols, gradient=gradient, hessian=NOT_SUPPORTED, minimizer_options=minimizer_options, tol=tol, verbosity=verbosity, maxiter=maxiter, verbosity_setter=verbosity_setter, strategy=strategy, criterion=criterion, name=name, )
ScipyLBFGSBV1._add_derivative_methods( gradient=[ "2-point", "3-point", # 'cs' # works badly None, True, False, "zfit", ] ) class ScipyTrustKrylovV1(ScipyBaseMinimizerV1): @warn_experimental_feature def __init__( self, tol: float | None = None, inexact: bool | None = None, gradient: Callable | str | None = None, hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy) = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy trust-krylov V1", ) -> None: """PERFORMS POORLY! Local, gradient based (nearly) exact trust-region algorithm using matrix vector products with the hessian. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| inexact: Accuracy to solve subproblems. If True requires less nonlinear iterations, but more vector products. gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation that the minimizer should use. This can be the gradient provided by the loss itself or method from the minimizer. In general, using the zfit provided automatic gradient is more precise and needs less computation time for the evaluation compared to a numerical method, but it may not always be possible. In this case, zfit switches to a generic, numerical gradient which in general performs worse than if the minimizer has its own numerical gradient. The following are possible choices: If set to ``False`` or ``'zfit'`` (or ``None``; default), the gradient of the loss (usually the automatic gradient) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient| |@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal gradient estimation. This can be specified more clearly using the arguments ``'2-point'`` and ``'3-point'``, which specify the numerical algorithm the minimizer should use in order to estimate the gradient. |@docend:minimizer.scipy.gradient.internal| hessian: |@doc:minimizer.scipy.hessian| Define the method to use for the hessian computation that the minimizer should use. This can be the hessian provided by the loss itself or method from the minimizer. While the exact gradient can speed up the convergence and is often beneficial, this ain't true for the computation of the (inverse) Hessian matrix. Due to the :math:`n^2` number of entries (compared to :math:`n` in the gradient) from the :math:`n` parameters, this can grow quite large and become computationally expensive. Therefore, many algorithms use an approximated (inverse) Hessian matrix making use of the gradient updates instead of calculating the exact matrix. This turns out to be precise enough and usually considerably speeds up the convergence. The following are possible choices: If set to ``False`` or ``'zfit'``, the hessian defined in the loss (usually using automatic differentiation) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.hessian| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} if inexact is not None: options["inexact"] = inexact minimizer_options = {} if options: minimizer_options["options"] = options scipy_tols = {"gtol": None} super().__init__( method="trust-krylov", internal_tol=scipy_tols, gradient=gradient, hessian=hessian, minimizer_options=minimizer_options, tol=tol, verbosity=verbosity, maxiter=maxiter, strategy=strategy, criterion=criterion, name=name, ) ScipyTrustKrylovV1._add_derivative_methods( gradient=[ "2-point", "3-point", # 'cs', # works badly None, True, False, "zfit", ], hessian=[ # '2-point', '3-point', # 'cs', # scipy.optimize.BFGS, scipy.optimize.SR1, None, # True, False, "zfit", ], ) class ScipyTrustNCGV1(ScipyBaseMinimizerV1): @warn_experimental_feature def __init__( self, tol: float | None = None, init_trust_radius: float | None = None, eta: float | None = None, max_trust_radius: int | None = None, gradient: Callable | str | None = None, hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy) = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy trust-ncg V1", ) -> None: """PERFORMS POORLY! Local Newton conjugate gradient trust-region algorithm. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| eta: |@doc:minimizer.trust.eta| Trust region related acceptance stringency for proposed steps. |@docend:minimizer.trust.eta| init_trust_radius: |@doc:minimizer.trust.init_trust_radius| Initial trust-region radius. |@docend:minimizer.trust.init_trust_radius| max_trust_radius: |@doc:minimizer.trust.max_trust_radius| Maximum value of the trust-region radius. No steps that are longer than this value will be proposed. |@docend:minimizer.trust.max_trust_radius| gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation that the minimizer should use. This can be the gradient provided by the loss itself or method from the minimizer. In general, using the zfit provided automatic gradient is more precise and needs less computation time for the evaluation compared to a numerical method, but it may not always be possible. In this case, zfit switches to a generic, numerical gradient which in general performs worse than if the minimizer has its own numerical gradient. The following are possible choices: If set to ``False`` or ``'zfit'`` (or ``None``; default), the gradient of the loss (usually the automatic gradient) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient| hessian: |@doc:minimizer.scipy.hessian| Define the method to use for the hessian computation that the minimizer should use. This can be the hessian provided by the loss itself or method from the minimizer. While the exact gradient can speed up the convergence and is often beneficial, this ain't true for the computation of the (inverse) Hessian matrix. Due to the :math:`n^2` number of entries (compared to :math:`n` in the gradient) from the :math:`n` parameters, this can grow quite large and become computationally expensive. Therefore, many algorithms use an approximated (inverse) Hessian matrix making use of the gradient updates instead of calculating the exact matrix. This turns out to be precise enough and usually considerably speeds up the convergence. The following are possible choices: If set to ``False`` or ``'zfit'``, the hessian defined in the loss (usually using automatic differentiation) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.hessian| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} if eta is not None: options["eta"] = eta if max_trust_radius is not None: options["max_trust_radius"] = max_trust_radius if init_trust_radius is not None: options["initial_trust_radius"] = init_trust_radius def initializer(options, init, step_size, **_): trust_radius = None if init is not None: trust_radius = init.info.get("tr_radius") elif step_size is not None: trust_radius = np.mean(step_size) if trust_radius is not None: options["initial_trust_radius"] = trust_radius return options if hessian is None: hessian = BFGS minimizer_options = {} if options: minimizer_options["options"] = options scipy_tols = {"gtol": None} super().__init__( method="trust-ncg", internal_tol=scipy_tols, gradient=gradient, hessian=hessian, minimizer_options=minimizer_options, tol=tol, verbosity=verbosity, maxiter=maxiter, initializer=initializer, strategy=strategy, criterion=criterion, name=name, ) ScipyTrustNCGV1._add_derivative_methods( gradient=[ # '2-point', '3-point', # 'cs' # works badly None, # True, False, "zfit", ], hessian=[ # '2-point', '3-point', # 'cs', # scipy.optimize.BFGS, scipy.optimize.SR1, None, # True, False, "zfit", ], )
[docs] class ScipyTrustConstrV1(ScipyBaseMinimizerV1): def __init__( self, tol: float | None = None, init_trust_radius: int | None = None, gradient: Callable | str | None = None, hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy) = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy trust-constr V1", ) -> None: """Trust-region based local minimizer. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| init_trust_radius: |@doc:minimizer.trust.init_trust_radius| Initial trust-region radius. |@docend:minimizer.trust.init_trust_radius| gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation that the minimizer should use. This can be the gradient provided by the loss itself or method from the minimizer. In general, using the zfit provided automatic gradient is more precise and needs less computation time for the evaluation compared to a numerical method, but it may not always be possible. In this case, zfit switches to a generic, numerical gradient which in general performs worse than if the minimizer has its own numerical gradient. The following are possible choices: If set to ``False`` or ``'zfit'`` (or ``None``; default), the gradient of the loss (usually the automatic gradient) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient| |@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal gradient estimation. This can be specified more clearly using the arguments ``'2-point'`` and ``'3-point'``, which specify the numerical algorithm the minimizer should use in order to estimate the gradient. |@docend:minimizer.scipy.gradient.internal| hessian: |@doc:minimizer.scipy.hessian| Define the method to use for the hessian computation that the minimizer should use. This can be the hessian provided by the loss itself or method from the minimizer. While the exact gradient can speed up the convergence and is often beneficial, this ain't true for the computation of the (inverse) Hessian matrix. Due to the :math:`n^2` number of entries (compared to :math:`n` in the gradient) from the :math:`n` parameters, this can grow quite large and become computationally expensive. Therefore, many algorithms use an approximated (inverse) Hessian matrix making use of the gradient updates instead of calculating the exact matrix. This turns out to be precise enough and usually considerably speeds up the convergence. The following are possible choices: If set to ``False`` or ``'zfit'``, the hessian defined in the loss (usually using automatic differentiation) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.hessian| |@doc:minimizer.scipy.hessian.internal| A :class:`~scipy.optimize.HessianUpdateStrategy` that holds an approximation of the hessian. For example :class:`~scipy.optimize.BFGS` (which performs usually best) or :class:`~scipy.optimize.SR1` (sometimes unstable updates). ``True`` (or ``None``; default) tells the minimizer to use its default internal hessian approximation. Arguments ``'2-point'`` and ``'3-point'`` specify which numerical algorithm the minimizer should use in order to estimate the hessian. This is only possible if the gradient is provided by zfit and not an internal numerical method is already used to determine it. |@docend:minimizer.scipy.hessian.internal| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} if init_trust_radius is not None: options["initial_tr_radius"] = init_trust_radius def initializer(options, init, step_size, **_): trust_radius = None if init is not None: trust_radius = init.info.get("tr_radius") elif step_size is not None: trust_radius = np.mean(step_size) if trust_radius is not None: options["initial_tr_radius"] = trust_radius return options def verbosity_setter(options, verbosity): options.pop("disp", None) if verbosity > 8: v = 3 elif verbosity > 6: v = 2 elif verbosity > 1: v = 1 else: v = 0 options["verbose"] = v # negative is quite, goes to 100. start at verbosity > 1 return options minimizer_options = {} if options: minimizer_options["options"] = options scipy_tols = {"gtol": None, "xtol": None} if hessian is None: hessian = BFGS super().__init__( method="trust-constr", internal_tol=scipy_tols, gradient=gradient, hessian=hessian, minimizer_options=minimizer_options, tol=tol, verbosity=verbosity, maxiter=maxiter, initializer=initializer, verbosity_setter=verbosity_setter, strategy=strategy, criterion=criterion, name=name, )
ScipyTrustConstrV1._add_derivative_methods( gradient=[ "2-point", "3-point", # 'cs', # works badly None, True, False, "zfit", ], hessian=[ "2-point", "3-point", # 'cs', scipy.optimize.BFGS, scipy.optimize.SR1, None, True, False, "zfit", ], ) class ScipyNewtonCGV1(ScipyBaseMinimizerV1): @warn_experimental_feature def __init__( self, tol: float | None = None, gradient: Callable | str | None = None, hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy) = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy Newton-CG V1", ) -> None: """WARNING! This algorithm seems unstable and may does not perform well! |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation that the minimizer should use. This can be the gradient provided by the loss itself or method from the minimizer. In general, using the zfit provided automatic gradient is more precise and needs less computation time for the evaluation compared to a numerical method, but it may not always be possible. In this case, zfit switches to a generic, numerical gradient which in general performs worse than if the minimizer has its own numerical gradient. The following are possible choices: If set to ``False`` or ``'zfit'`` (or ``None``; default), the gradient of the loss (usually the automatic gradient) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient| |@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal gradient estimation. This can be specified more clearly using the arguments ``'2-point'`` and ``'3-point'``, which specify the numerical algorithm the minimizer should use in order to estimate the gradient. |@docend:minimizer.scipy.gradient.internal| hessian: |@doc:minimizer.scipy.hessian| Define the method to use for the hessian computation that the minimizer should use. This can be the hessian provided by the loss itself or method from the minimizer. While the exact gradient can speed up the convergence and is often beneficial, this ain't true for the computation of the (inverse) Hessian matrix. Due to the :math:`n^2` number of entries (compared to :math:`n` in the gradient) from the :math:`n` parameters, this can grow quite large and become computationally expensive. Therefore, many algorithms use an approximated (inverse) Hessian matrix making use of the gradient updates instead of calculating the exact matrix. This turns out to be precise enough and usually considerably speeds up the convergence. The following are possible choices: If set to ``False`` or ``'zfit'``, the hessian defined in the loss (usually using automatic differentiation) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.hessian| |@doc:minimizer.scipy.hessian.internal| A :class:`~scipy.optimize.HessianUpdateStrategy` that holds an approximation of the hessian. For example :class:`~scipy.optimize.BFGS` (which performs usually best) or :class:`~scipy.optimize.SR1` (sometimes unstable updates). ``True`` (or ``None``; default) tells the minimizer to use its default internal hessian approximation. Arguments ``'2-point'`` and ``'3-point'`` specify which numerical algorithm the minimizer should use in order to estimate the hessian. This is only possible if the gradient is provided by zfit and not an internal numerical method is already used to determine it. |@docend:minimizer.scipy.hessian.internal| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ minimizer_options = {} if options := {}: minimizer_options["options"] = options scipy_tols = {"xtol": None} if hessian is None: hessian = BFGS method = "Newton-CG" super().__init__( method=method, internal_tol=scipy_tols, gradient=gradient, hessian=hessian, minimizer_options=minimizer_options, tol=tol, verbosity=verbosity, maxiter=maxiter, strategy=strategy, criterion=criterion, name=name, ) ScipyNewtonCGV1._add_derivative_methods( gradient=[ "2-point", "3-point", # 'cs', # works badly None, True, False, "zfit", ], hessian=[ "2-point", "3-point", # 'cs', scipy.optimize.BFGS, scipy.optimize.SR1, None, True, False, "zfit", ], )
[docs] class ScipyTruncNCV1(ScipyBaseMinimizerV1): def __init__( self, tol: float | None = None, maxcg: int | None = None, # maxCGit maxls: int | None = None, # stepmx eta: float | None = None, rescale: float | None = None, gradient: Callable | str | None = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy Truncated Newton Conjugate V1", ) -> None: """Local, gradient based minimization algorithm using a truncated Newton method. `Truncated Newton Methods <https://en.wikipedia.org/wiki/Truncated_Newton_method>`_ provide a hessian-free way of optimization. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| maxcg: Maximum number of conjugate gradient evaluations (hessian*vector evaluations) per main iteration. If maxCGit == 0, the direction chosen is -gradient if maxCGit < 0, maxCGit is set to max(1,min(50,n/2)). Defaults to -1. maxls: Maximum step for the line search. May be increased during call. If too small, it will be set to 10.0. Defaults to 0. eta: Severity of the line search, should be between 0 and 1. rescale: Scaling factor (in log10) used to trigger loss value rescaling. If set to 0, rescale at each iteration. If it is a very large value, never rescale. gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation that the minimizer should use. This can be the gradient provided by the loss itself or method from the minimizer. In general, using the zfit provided automatic gradient is more precise and needs less computation time for the evaluation compared to a numerical method, but it may not always be possible. In this case, zfit switches to a generic, numerical gradient which in general performs worse than if the minimizer has its own numerical gradient. The following are possible choices: If set to ``False`` or ``'zfit'`` (or ``None``; default), the gradient of the loss (usually the automatic gradient) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient| |@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal gradient estimation. This can be specified more clearly using the arguments ``'2-point'`` and ``'3-point'``, which specify the numerical algorithm the minimizer should use in order to estimate the gradient. |@docend:minimizer.scipy.gradient.internal| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} if maxcg is not None: options["maxiter_cg"] = maxcg if eta is not None: options["eta"] = eta if maxls is not None: options["maxstep_ls"] = maxls if rescale is not None: options["rescale"] = rescale options["maxfun"] = None # in order to use maxiter minimizer_options = {} if options: minimizer_options["options"] = options def initializer(options, step_size, **_): if step_size is not None: options["scale"] = step_size return options scipy_tols = {"xtol": None, "ftol": None, "gtol": None} method = "TNC" super().__init__( method=method, tol=tol, verbosity=verbosity, strategy=strategy, gradient=gradient, hessian=NOT_SUPPORTED, criterion=criterion, internal_tol=scipy_tols, maxiter=maxiter, initializer=initializer, minimizer_options=minimizer_options, name=name, )
ScipyTruncNCV1._add_derivative_methods( gradient=[ "2-point", "3-point", # 'cs' # works badly None, True, False, "zfit", ] ) class ScipyDoglegV1(ScipyBaseMinimizerV1): def __init__( self, tol: float | None = None, init_trust_radius: int | None = None, eta: float | None = None, max_trust_radius: int | None = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy Dogleg V1", ) -> None: """This minimizer requires the hessian and gradient to be provided by the loss itself. Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| eta: |@doc:minimizer.trust.eta| Trust region related acceptance stringency for proposed steps. |@docend:minimizer.trust.eta| init_trust_radius: |@doc:minimizer.trust.init_trust_radius| Initial trust-region radius. |@docend:minimizer.trust.init_trust_radius| max_trust_radius: |@doc:minimizer.trust.max_max_trust_radius||@docend:minimizer.trust.max_max_trust_radius| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} if init_trust_radius is not None: options["initial_tr_radius"] = init_trust_radius if eta is not None: options["eta"] = eta if max_trust_radius is not None: options["max_trust_radius"] = max_trust_radius minimizer_options = {} if options: minimizer_options["options"] = options scipy_tols = {"gtol": None} super().__init__( method="dogleg", internal_tol=scipy_tols, gradient="zfit", hessian="zfit", minimizer_options=minimizer_options, tol=tol, verbosity=verbosity, maxiter=maxiter, strategy=strategy, criterion=criterion, name=name, ) ScipyDoglegV1._add_derivative_methods(gradient=["zfit"], hessian=["zfit"])
[docs] class ScipyPowellV1(ScipyBaseMinimizerV1): def __init__( self, tol: float | None = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy Powell V1", ) -> None: """Local minimizer using the modified Powell algorithm. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} minimizer_options = {} if options: minimizer_options["options"] = options scipy_tols = {"xtol": None, "ftol": None} def initializer(options, init, **_): if init is not None: direc = init.info["original"].get("direc") if direc is not None: options["direc"] = direc return options method = "Powell" super().__init__( method=method, internal_tol=scipy_tols, gradient=NOT_SUPPORTED, hessian=NOT_SUPPORTED, minimizer_options=minimizer_options, tol=tol, maxiter=maxiter, initializer=initializer, verbosity=verbosity, strategy=strategy, criterion=criterion, name=name, )
[docs] class ScipySLSQPV1(ScipyBaseMinimizerV1): def __init__( self, tol: float | None = None, gradient: Callable | str | None = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy SLSQP V1", ) -> None: """Local, gradient-based minimizer using tho Sequential Least Squares Programming algorithm.name. `Sequential Least Squares Programming <https://en.wikipedia.org/wiki/Sequential_quadratic_programming>`_ is an iterative method for nonlinear parameter optimization. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation that the minimizer should use. This can be the gradient provided by the loss itself or method from the minimizer. In general, using the zfit provided automatic gradient is more precise and needs less computation time for the evaluation compared to a numerical method, but it may not always be possible. In this case, zfit switches to a generic, numerical gradient which in general performs worse than if the minimizer has its own numerical gradient. The following are possible choices: If set to ``False`` or ``'zfit'`` (or ``None``; default), the gradient of the loss (usually the automatic gradient) will be used; the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient| |@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal gradient estimation. This can be specified more clearly using the arguments ``'2-point'`` and ``'3-point'``, which specify the numerical algorithm the minimizer should use in order to estimate the gradient. |@docend:minimizer.scipy.gradient.internal| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ minimizer_options = {} if options := {}: minimizer_options["options"] = options scipy_tols = {"ftol": None} method = "SLSQP" super().__init__( method=method, internal_tol=scipy_tols, gradient=gradient, hessian=NOT_SUPPORTED, minimizer_options=minimizer_options, tol=tol, verbosity=verbosity, maxiter=maxiter, strategy=strategy, criterion=criterion, name=name, )
ScipySLSQPV1._add_derivative_methods( gradient=[ "2-point", "3-point", # 'cs', # works badly None, True, False, "zfit", ] ) class ScipyCOBYLAV1(ScipyBaseMinimizerV1): @warn_experimental_feature def __init__( self, tol: float | None = None, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy COBYLA V1", ) -> None: """UNSTABLE! Local gradient-free dowhhill simplex-like method with an implicit linear approximation. COBYLA constructs successive linear approximations of the objective function and constraints via a simplex of n+1 points (in n dimensions), and optimizes these approximations in a trust region at each step. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} minimizer_options = {"options": options} scipy_tols = {"tol": None} method = "COBYLA" super().__init__( method=method, internal_tol=scipy_tols, gradient=NOT_SUPPORTED, hessian=NOT_SUPPORTED, minimizer_options=minimizer_options, tol=tol, maxiter=maxiter, verbosity=verbosity, strategy=strategy, criterion=criterion, name=name, ) class ScipyNelderMeadV1(ScipyBaseMinimizerV1): def __init__( self, tol: float | None = None, adaptive: bool | None = True, verbosity: int | None = None, maxiter: int | str | None = None, criterion: ConvergenceCriterion | None = None, strategy: ZfitStrategy | None = None, name: str = "SciPy Nelder-Mead V1", ) -> None: """Local gradient-free dowhhill simplex method.py. `Nelder-Mead <https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method>`_ is a gradient-free method to minimize an objective function. It's performance is usually inferior to gradient based algorithms. |@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in `SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info| Args: tol: |@doc:minimizer.tol| Termination value for the convergence/stopping criterion of the algorithm in order to determine if the minimum has been found. Defaults to 1e-3. |@docend:minimizer.tol| adaptive: verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10. The verbosity has the meaning: - a value of 0 means quiet and no output - above 0 up to 5, information that is good to know but without flooding the user, corresponding to a "INFO" level. - A value above 5 starts printing out considerably more and is used more for debugging purposes. - Setting the verbosity to 10 will print out every evaluation of the loss function and gradient. Some minimizers offer additional output which is also distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity| maxiter: |@doc:minimizer.maxiter| Approximate number of iterations. This corresponds to roughly the maximum number of evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter| criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an estimated measure for the distance to the minimum and can include the relative or absolute changes of the parameters, function value, gradients and more. If the value of the criterion is smaller than ``loss.errordef * tol``, the algorithm stopps and it is assumed that the minimum has been found. |@docend:minimizer.criterion| strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no input arguments in the init. Determines the behavior of the minimizer in certain situations, most notably when encountering NaNs. It can also implement a callback function. |@docend:minimizer.strategy| name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name| """ options = {} minimizer_options = {} if adaptive is not None: options["adaptive"] = adaptive minimizer_options["options"] = options def initializer(options, init, **_): if init is not None: init_simplex = init.info["original"].get("final_simplex") if init_simplex is not None: options["initial_simplex"] = init_simplex return options scipy_tols = {"fatol": None, "xatol": None} method = "Nelder-Mead" super().__init__( method=method, internal_tol=scipy_tols, gradient=NOT_SUPPORTED, hessian=NOT_SUPPORTED, minimizer_options=minimizer_options, tol=tol, maxiter=maxiter, initializer=initializer, verbosity=verbosity, strategy=strategy, criterion=criterion, name=name, ) def combine_optimize_results(results): if len(results) == 1: return results[0] result = results[-1] for field in ["nfev", "njev", "nhev", "nit"]: if field in result: result[field] = sum(res[field] for res in results) return result