# Copyright (c) 2024 zfit
from __future__ import annotations
import copy
import inspect
import math
from collections.abc import Callable, Mapping
import numpy as np
import scipy.optimize
from scipy.optimize import BFGS, HessianUpdateStrategy
from ..core.parameter import assign_values
from ..util.container import convert_to_container
from ..util.exception import MaximumIterationReached
from ..util.warnings import warn_experimental_feature
from .baseminimizer import (
NOT_SUPPORTED,
BaseMinimizer,
minimize_supports,
print_minimization_status,
)
from .fitresult import FitResult
from .strategy import ZfitStrategy
from .termination import CRITERION_NOT_AVAILABLE, ConvergenceCriterion
class ScipyBaseMinimizer(BaseMinimizer):
_VALID_SCIPY_GRADIENT = None
_VALID_SCIPY_HESSIAN = None
def __init__(
self,
method: str,
tol: float | None,
internal_tol: Mapping[str, float | None],
gradient: Callable | str | NOT_SUPPORTED | None,
hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy | NOT_SUPPORTED),
maxiter: int | str | None = None,
minimizer_options: Mapping[str, object] | None = None,
verbosity: int | None = None,
strategy: ZfitStrategy | None = None,
criterion: ConvergenceCriterion | None = None,
minimize_func: callable | None = None,
initializer: Callable | None = None,
verbosity_setter: Callable | None = None,
name: str = "ScipyMinimizer",
) -> None:
"""Base minimizer wrapping the SciPy librarys optimize module.
To implemend a subclass, inherit from this class and:
- override ``_minimize`` (which has the same signature as :meth:`~BaseMinimizer.minimize`.
and decorate it with ``minimize_supports``.
- (optional) add the allowed methods for gradients and hessian with Class._add_derivative_methods(...)
Args:
method: Name of the method as given to :func:`~scipy.optimize.minimize`
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
minimizer_options:
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
minimize_func:
initializer:
verbosity_setter:
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
self._minimize_func = scipy.optimize.minimize if minimize_func is None else minimize_func
if initializer is None:
def initializer(options, init, stepsize):
del init, stepsize
return options
if not callable(initializer):
msg = f"Initializer has to be callable not {initializer}"
raise TypeError(msg)
self._scipy_initializer = initializer
if verbosity_setter is None:
def verbosity_setter(options, verbosity):
del verbosity
return options
if not callable(verbosity_setter):
msg = f"verbosity_setter has to be callable not {verbosity_setter}"
raise TypeError(msg)
self._scipy_verbosity_setter = verbosity_setter
minimizer_options = {} if minimizer_options is None else minimizer_options
minimizer_options = copy.copy(minimizer_options)
minimizer_options["method"] = method
if "options" not in minimizer_options:
minimizer_options["options"] = {}
if gradient in (True, "2-point", "3-point") and not (
isinstance(hessian, HessianUpdateStrategy) or hessian is NOT_SUPPORTED
):
msg = (
"Whenever the gradient is estimated via finite-differences, "
"the Hessian has to be estimated using one of the quasi-Newton strategies."
)
raise ValueError(msg)
if gradient is not NOT_SUPPORTED:
if self._VALID_SCIPY_GRADIENT is not None and gradient not in self._VALID_SCIPY_GRADIENT:
msg = (
f"Requested gradient {gradient} is not a valid choice. Possible"
f" gradient methods are {self._VALID_SCIPY_GRADIENT}"
)
raise ValueError(msg)
if gradient is False or gradient is None:
gradient = "zfit"
elif gradient is True:
gradient = None
minimizer_options["grad"] = gradient
if hessian is not NOT_SUPPORTED:
if self._VALID_SCIPY_HESSIAN is not None and hessian not in self._VALID_SCIPY_HESSIAN:
msg = (
f"Requested hessian {hessian} is not a valid choice. Possible"
f" hessian methods are {self._VALID_SCIPY_HESSIAN}"
)
raise ValueError(msg)
if isinstance(hessian, scipy.optimize.HessianUpdateStrategy) and not inspect.isclass(hessian):
msg = (
"If `hesse` is a HessianUpdateStrategy, it has to be a class that takes `init_scale`,"
" not an instance. For further modification of other initial parameters, make a"
" subclass of the update strategy."
)
raise ValueError(msg)
if hessian is True:
hessian = None
elif hessian is False or hessian is None:
hessian = "zfit"
minimizer_options["hess"] = hessian
self._internal_tol = internal_tol
self._internal_maxiter = 20
self._nrandom_max = 5
super().__init__(
name=name,
tol=tol,
verbosity=verbosity,
minimizer_options=minimizer_options,
strategy=strategy,
criterion=criterion,
maxiter=maxiter,
)
@classmethod
def _add_derivative_methods(cls, gradient=None, hessian=None):
gradient = convert_to_container(gradient, container=set)
hessian = convert_to_container(hessian, container=set)
if gradient is not None:
if cls._VALID_SCIPY_GRADIENT is None:
cls._VALID_SCIPY_GRADIENT = set()
cls._VALID_SCIPY_GRADIENT.update(gradient)
if hessian is not None:
if cls._VALID_SCIPY_HESSIAN is None:
cls._VALID_SCIPY_HESSIAN = set()
cls._VALID_SCIPY_HESSIAN.update(hessian)
@classmethod
def __init_subclass__(cls, **kwargs):
super().__init_subclass__(**kwargs)
if cls._VALID_SCIPY_GRADIENT is not None:
cls._VALID_SCIPY_GRADIENT = ScipyBaseMinimizer._VALID_SCIPY_GRADIENT.copy()
if cls._VALID_SCIPY_HESSIAN is not None:
cls._VALID_SCIPY_HESSIAN = ScipyBaseMinimizer._VALID_SCIPY_HESSIAN.copy()
@minimize_supports(init=True)
def _minimize(self, loss, params, init: FitResult):
if init:
assign_values(params=params, values=init)
result_prelim = init
evaluator = self.create_evaluator(loss=loss, params=params, numpy_converter=np.array)
limits = [(float(p.lower), float(p.upper)) for p in params]
init_values = np.array(params)
minimizer_options = self.minimizer_options.copy()
minimizer_options["bounds"] = limits
eval_func = evaluator.value
use_gradient = "grad" in minimizer_options
if use_gradient:
gradient = minimizer_options.pop("grad")
gradient = evaluator.gradient if gradient == "zfit" else gradient
minimizer_options["jac"] = gradient
use_hessian = "hess" in minimizer_options
if use_hessian:
hessian = minimizer_options.pop("hess")
hessian = evaluator.hessian if hessian == "zfit" else hessian
minimizer_options["hess"] = hessian
is_update_strat = inspect.isclass(hessian) and issubclass(hessian, scipy.optimize.HessianUpdateStrategy)
init_scale = "auto"
# get possible initial step size from previous minimizer
approx_stepsizes = None
if init:
approx_init_hesse = result_prelim.hesse(params=params, method="approx", name="approx")
if approx_init_hesse:
approx_stepsizes = [val["error"] for val in approx_init_hesse.values()] or None
if approx_stepsizes is None:
approx_stepsizes = np.array([0.1 if p.stepsize is None else p.stepsize for p in params])
if (maxiter := self.get_maxiter(len(params))) is not None:
# stop 3 iterations earlier than we
minimizer_options["options"]["maxiter"] = maxiter - 3 if maxiter > 10 else maxiter
minimizer_options["options"]["disp"] = self.verbosity > 6
minimizer_options["options"] = self._scipy_verbosity_setter(
minimizer_options["options"], verbosity=self.verbosity
)
# tolerances and criterion
criterion = self.create_criterion(loss, params)
init_tol = min([math.sqrt(loss.errordef * self.tol), loss.errordef * self.tol * 1e3])
internal_tol = self._internal_tol
internal_tol = {tol: init_tol if init is None else init for tol, init in internal_tol.items()}
valid = None
message = None
optimize_results = None
nrandom = 0
old_edm = -1
n_paramatlim = 0
hessian_updater = None
for i in range(self._internal_maxiter):
minimizer_options["options"] = self._scipy_initializer(
minimizer_options["options"],
init=result_prelim,
stepsize=approx_stepsizes,
)
# update from previous run/result
if use_hessian and is_update_strat:
if not isinstance(init_scale, str):
init_scale = np.mean([approx for approx in approx_stepsizes if approx is not None])
if i == 0:
hessian_updater = hessian(init_scale=init_scale)
minimizer_options["hess"] = hessian_updater
else:
minimizer_options["hess"] = hessian_updater
for tol, val in internal_tol.items():
minimizer_options["options"][tol] = val
# perform minimization
optim_result = None
try:
optim_result = self._minimize_func(fun=eval_func, x0=init_values, **minimizer_options)
except MaximumIterationReached as error:
if optim_result is None: # it didn't even run once
msg = (
"Maximum iteration reached on first wrapped minimizer call. This"
"is likely to a too low number of maximum iterations (currently"
f" {evaluator.maxiter}) or wrong internal tolerances, in which"
f" case: please fill an issue on github."
)
raise MaximumIterationReached(msg) from error
maxiter_reached = True
valid = False
message = "Maxiter reached, terminated without convergence"
else:
maxiter_reached = evaluator.niter > evaluator.maxiter
values = optim_result["x"]
fmin = optim_result.fun
assign_values(params, values)
optimize_results = combine_optimize_results(
[optim_result] if optimize_results is None else [optimize_results, optim_result]
)
result_prelim = FitResult.from_scipy(
loss=loss,
params=params,
result=optimize_results,
minimizer=self,
edm=CRITERION_NOT_AVAILABLE,
criterion=None,
message="INTERNAL for Criterion",
valid=valid,
)
if result_prelim.params_at_limit:
n_paramatlim += 1
approx_init_hesse = result_prelim.hesse(params=params, method="approx", name="approx")
if approx_init_hesse:
approx_stepsizes = [val["error"] for val in approx_init_hesse.values()] or None
converged = criterion.converged(result_prelim)
valid = converged
edm = criterion.last_value
if self.verbosity > 5:
print_minimization_status(
converged=converged,
criterion=criterion,
evaluator=evaluator,
i=i,
fminopt=fmin,
internal_tol=internal_tol,
)
if math.isclose(old_edm, edm, rel_tol=1e-4, abs_tol=1e-12):
if nrandom < self._nrandom_max: # in order not to start too close
rnd_range = np.ones_like(values) if approx_stepsizes is None else approx_stepsizes
rnd_range_no_nan = np.nan_to_num(rnd_range, nan=1.0)
values += np.random.uniform(low=-rnd_range_no_nan, high=rnd_range_no_nan) / 5
nrandom += 1
else:
message = f"Stuck (no change in a few iterations) at the edm={edm}"
valid = False
break
old_edm = edm
if n_paramatlim > 4:
message = "Parameters too often at limit during minimization."
break
if converged or maxiter_reached:
break
init_values = values
# update the tolerances
self._update_tol_inplace(criterion_value=edm, internal_tol=internal_tol)
else:
message = f"Invalid, criterion {criterion.name} is {edm}, target {self.tol} not reached."
valid = False
return FitResult.from_scipy(
loss=loss,
params=params,
result=optimize_results,
minimizer=self,
valid=valid,
criterion=criterion,
edm=edm,
message=message,
niter=evaluator.niter,
evaluator=evaluator,
)
[docs]
class ScipyLBFGSB(ScipyBaseMinimizer):
def __init__(
self,
tol: float | None = None,
maxcor: int | None = None,
maxls: int | None = None,
verbosity: int | None = None,
gradient: Callable | str | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy L-BFGS-B ",
) -> None:
"""Local, gradient based quasi-Newton algorithm using the limited-memory BFGS approximation.
Limited-memory BFGS is an optimization algorithm in the family of quasi-Newton methods
that approximates the Broyden-Fletcher-Goldfarb-Shanno algorithm (BFGS) using a limited amount of
memory (or gradients, controlled by *maxcor*).
L-BFGS borrows ideas from the trust region methods while keeping the L-BFGS update
of the Hessian and line search algorithms.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
maxcor: |@doc:minimizer.maxcor| Maximum number of memory history to keep
when using a quasi-Newton update formula such as BFGS.
It is the number of gradients
to “remember” from previous optimization
steps: increasing it increases
the memory requirements but may speed up the convergence. |@docend:minimizer.maxcor|
maxls: |@doc:minimizer.init.maxls| Maximum number of linesearch points. |@docend:minimizer.init.maxls|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
Increasing the verbosity will gradually increase the output.
gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation
that the minimizer should use. This can be the
gradient provided by the loss itself or
method from the minimizer.
In general, using the zfit provided automatic gradient is
more precise and needs less computation time for the
evaluation compared to a numerical method, but it may not always be
possible. In this case, zfit switches to a generic, numerical gradient
which in general performs worse than if the minimizer has its own
numerical gradient.
The following are possible choices:
If set to ``False`` or ``'zfit'`` (or ``None``; default), the
gradient of the loss (usually the automatic gradient) will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient|
|@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal
gradient estimation. This can be specified more clearly using the
arguments ``'2-point'`` and ``'3-point'``, which specify the
numerical algorithm the minimizer should use in order to
estimate the gradient. |@docend:minimizer.scipy.gradient.internal|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
if maxcor is not None:
options["maxcor"] = maxcor
if maxls is not None:
options["maxls"] = maxls
minimizer_options = {}
if options:
minimizer_options["options"] = options
def verbosity_setter(options, verbosity):
options.pop("disp", None)
options["iprint"] = int((verbosity - 2) * 12.5) # negative is quite, goes to 100. start at verbosity > 1
return options
scipy_tols = {"ftol": None, "gtol": None}
super().__init__(
method="L-BFGS-B",
internal_tol=scipy_tols,
gradient=gradient,
hessian=NOT_SUPPORTED,
minimizer_options=minimizer_options,
tol=tol,
verbosity=verbosity,
maxiter=maxiter,
verbosity_setter=verbosity_setter,
strategy=strategy,
criterion=criterion,
name=name,
)
ScipyLBFGSB._add_derivative_methods(
gradient=[
"2-point",
"3-point",
# 'cs' # works badly
None,
True,
False,
"zfit",
]
)
[docs]
class ScipyBFGS(ScipyBaseMinimizer):
def __init__(
self,
tol: float | None = None,
c1: float | None = None,
c2: float | None = None,
verbosity: int | None = None,
gradient: Callable | str | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str | None = None,
) -> None:
"""Local, gradient based quasi-Newton algorithm using the BFGS algorithm.
BFGS, named after Broyden, Fletcher, Goldfarb, and Shanno, is a quasi-Newton method
that approximates the Hessian matrix of the loss function using the gradients of the loss function.
It stores an approximation of the inverse Hessian matrix and updates it at each iteration.
For a limited memory version, which doesn't store the full matrix, see L-BFGS-B.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
c1: |@doc:minimizer.init.c1| The coefficient for the Wolfe condition for the Armijo rule.
The Armijo rule is a line search method that ensures that the step size
is not too large. This is also called the sufficient decrease condition,
which effectively provides an upper bound to the step size.
The value is constrained to be 0 < c1 < c2 < 1.
Defaults to 1e-4. |@docend:minimizer.init.c1|
c2: |@doc:minimizer.init.c2| The coefficient for the Wolfe condition for the curvature rule.
The curvature rule is a line search method that ensures that the step size
is not too small. This is also called the curvature condition,
which effectively provides a lower bound to the step size.
The value is constrained to be 0 < c1 < c2 < 1.
Defaults to 0.4. |@docend:minimizer.init.c2|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
Increasing the verbosity will gradually increase the output.
gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation
that the minimizer should use. This can be the
gradient provided by the loss itself or
method from the minimizer.
In general, using the zfit provided automatic gradient is
more precise and needs less computation time for the
evaluation compared to a numerical method, but it may not always be
possible. In this case, zfit switches to a generic, numerical gradient
which in general performs worse than if the minimizer has its own
numerical gradient.
The following are possible choices:
If set to ``False`` or ``'zfit'`` (or ``None``; default), the
gradient of the loss (usually the automatic gradient) will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient|
|@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal
gradient estimation. This can be specified more clearly using the
arguments ``'2-point'`` and ``'3-point'``, which specify the
numerical algorithm the minimizer should use in order to
estimate the gradient. |@docend:minimizer.scipy.gradient.internal|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
if name is None:
name = "SciPy BFGS"
options = {}
if c1 is None:
c1 = 1e-4
options["c1"] = c1
if c2 is None:
c2 = 0.4
options["c2"] = c2
minimizer_options = {}
if options:
minimizer_options["options"] = options
def verbosity_setter(options, verbosity):
options["disp"] = bool(verbosity - 6) >= 0 # start printing at 6
return options
scipy_tols = {
"gtol": None,
# 'xrtol': None
}
def initializer(options, init: FitResult, stepsize, **_):
hess_inv0 = None
if init is not None:
hess_inv0 = init.approx.inv_hessian()
elif stepsize is not None:
hess_inv0 = np.diag(np.array(stepsize) ** 2)
if False:
options["hess_inv0"] = hess_inv0
return options
super().__init__(
method="BFGS",
internal_tol=scipy_tols,
gradient=gradient,
hessian=NOT_SUPPORTED,
minimizer_options=minimizer_options,
initializer=initializer,
tol=tol,
verbosity=verbosity,
maxiter=maxiter,
verbosity_setter=verbosity_setter,
strategy=strategy,
criterion=criterion,
name=name,
)
ScipyBFGS._add_derivative_methods(
gradient=[
"2-point",
"3-point",
# 'cs' # works badly
None,
True,
False,
"zfit",
]
)
[docs]
class ScipyTrustKrylov(ScipyBaseMinimizer):
@warn_experimental_feature
def __init__(
self,
tol: float | None = None,
inexact: bool | None = None,
gradient: Callable | str | None = None,
hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy) = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy trust-krylov ",
) -> None:
"""PERFORMS POORLY! Local, gradient based (nearly) exact trust-region algorithm using matrix vector products
with the hessian.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
inexact: Accuracy to solve subproblems.
If True requires less nonlinear iterations, but more vector products.
gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation
that the minimizer should use. This can be the
gradient provided by the loss itself or
method from the minimizer.
In general, using the zfit provided automatic gradient is
more precise and needs less computation time for the
evaluation compared to a numerical method, but it may not always be
possible. In this case, zfit switches to a generic, numerical gradient
which in general performs worse than if the minimizer has its own
numerical gradient.
The following are possible choices:
If set to ``False`` or ``'zfit'`` (or ``None``; default), the
gradient of the loss (usually the automatic gradient) will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient|
|@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal
gradient estimation. This can be specified more clearly using the
arguments ``'2-point'`` and ``'3-point'``, which specify the
numerical algorithm the minimizer should use in order to
estimate the gradient. |@docend:minimizer.scipy.gradient.internal|
hessian: |@doc:minimizer.scipy.hessian| Define the method to use for the hessian computation
that the minimizer should use. This can be the
hessian provided by the loss itself or
method from the minimizer.
While the exact gradient can speed up the convergence and is
often beneficial, this ain't true for the computation of the
(inverse) Hessian matrix.
Due to the :math:`n^2` number of entries (compared to :math:`n` in the
gradient) from the :math:`n` parameters, this can grow quite
large and become computationally expensive.
Therefore, many algorithms use an approximated (inverse)
Hessian matrix making use of the gradient updates instead
of calculating the exact matrix. This turns out to be
precise enough and usually considerably speeds up the
convergence.
The following are possible choices:
If set to ``False`` or ``'zfit'``, the
hessian defined in the loss (usually using automatic differentiation)
will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.hessian|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
if inexact is not None:
options["inexact"] = inexact
minimizer_options = {}
if options:
minimizer_options["options"] = options
scipy_tols = {"gtol": None}
super().__init__(
method="trust-krylov",
internal_tol=scipy_tols,
gradient=gradient,
hessian=hessian,
minimizer_options=minimizer_options,
tol=tol,
verbosity=verbosity,
maxiter=maxiter,
strategy=strategy,
criterion=criterion,
name=name,
)
ScipyTrustKrylov._add_derivative_methods(
gradient=[
"2-point",
"3-point",
# 'cs', # works badly
None,
True,
False,
"zfit",
],
hessian=[
# '2-point', '3-point',
# 'cs',
# scipy.optimize.BFGS, scipy.optimize.SR1,
None,
# True,
False,
"zfit",
],
)
[docs]
class ScipyTrustNCG(ScipyBaseMinimizer):
@warn_experimental_feature
def __init__(
self,
tol: float | None = None,
init_trust_radius: float | None = None,
eta: float | None = None,
max_trust_radius: int | None = None,
gradient: Callable | str | None = None,
hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy) = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy trust-ncg ",
) -> None:
"""PERFORMS POORLY! Local Newton conjugate gradient trust-region algorithm.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
eta: |@doc:minimizer.trust.eta| Trust region related acceptance
stringency for proposed steps. |@docend:minimizer.trust.eta|
init_trust_radius: |@doc:minimizer.trust.init_trust_radius| Initial trust-region radius. |@docend:minimizer.trust.init_trust_radius|
max_trust_radius: |@doc:minimizer.trust.max_trust_radius| Maximum value of the trust-region radius.
No steps that are longer than this value will be proposed. |@docend:minimizer.trust.max_trust_radius|
gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation
that the minimizer should use. This can be the
gradient provided by the loss itself or
method from the minimizer.
In general, using the zfit provided automatic gradient is
more precise and needs less computation time for the
evaluation compared to a numerical method, but it may not always be
possible. In this case, zfit switches to a generic, numerical gradient
which in general performs worse than if the minimizer has its own
numerical gradient.
The following are possible choices:
If set to ``False`` or ``'zfit'`` (or ``None``; default), the
gradient of the loss (usually the automatic gradient) will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient|
hessian: |@doc:minimizer.scipy.hessian| Define the method to use for the hessian computation
that the minimizer should use. This can be the
hessian provided by the loss itself or
method from the minimizer.
While the exact gradient can speed up the convergence and is
often beneficial, this ain't true for the computation of the
(inverse) Hessian matrix.
Due to the :math:`n^2` number of entries (compared to :math:`n` in the
gradient) from the :math:`n` parameters, this can grow quite
large and become computationally expensive.
Therefore, many algorithms use an approximated (inverse)
Hessian matrix making use of the gradient updates instead
of calculating the exact matrix. This turns out to be
precise enough and usually considerably speeds up the
convergence.
The following are possible choices:
If set to ``False`` or ``'zfit'``, the
hessian defined in the loss (usually using automatic differentiation)
will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.hessian|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
if eta is not None:
options["eta"] = eta
if max_trust_radius is not None:
options["max_trust_radius"] = max_trust_radius
if init_trust_radius is not None:
options["initial_trust_radius"] = init_trust_radius
def initializer(options, init, stepsize, **_):
trust_radius = None
if init is not None:
trust_radius = init.info.get("tr_radius")
elif stepsize is not None:
trust_radius = np.mean(stepsize)
if trust_radius is not None:
options["initial_trust_radius"] = trust_radius
return options
if hessian is None:
hessian = BFGS
minimizer_options = {}
if options:
minimizer_options["options"] = options
scipy_tols = {"gtol": None}
super().__init__(
method="trust-ncg",
internal_tol=scipy_tols,
gradient=gradient,
hessian=hessian,
minimizer_options=minimizer_options,
tol=tol,
verbosity=verbosity,
maxiter=maxiter,
initializer=initializer,
strategy=strategy,
criterion=criterion,
name=name,
)
ScipyTrustNCG._add_derivative_methods(
gradient=[
"2-point",
"3-point",
# 'cs' # works badly
None,
# True,
False,
"zfit",
],
hessian=[
"2-point",
"3-point",
# 'cs',
scipy.optimize.BFGS,
scipy.optimize.SR1,
None,
# True,
False,
"zfit",
],
)
[docs]
class ScipyTrustConstr(ScipyBaseMinimizer):
def __init__(
self,
tol: float | None = None,
init_trust_radius: int | None = None,
gradient: Callable | str | None = None,
hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy) = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy trust-constr ",
) -> None:
"""Trust-region based local minimizer.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
init_trust_radius: |@doc:minimizer.trust.init_trust_radius| Initial trust-region radius. |@docend:minimizer.trust.init_trust_radius|
gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation
that the minimizer should use. This can be the
gradient provided by the loss itself or
method from the minimizer.
In general, using the zfit provided automatic gradient is
more precise and needs less computation time for the
evaluation compared to a numerical method, but it may not always be
possible. In this case, zfit switches to a generic, numerical gradient
which in general performs worse than if the minimizer has its own
numerical gradient.
The following are possible choices:
If set to ``False`` or ``'zfit'`` (or ``None``; default), the
gradient of the loss (usually the automatic gradient) will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient|
|@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal
gradient estimation. This can be specified more clearly using the
arguments ``'2-point'`` and ``'3-point'``, which specify the
numerical algorithm the minimizer should use in order to
estimate the gradient. |@docend:minimizer.scipy.gradient.internal|
hessian: |@doc:minimizer.scipy.hessian| Define the method to use for the hessian computation
that the minimizer should use. This can be the
hessian provided by the loss itself or
method from the minimizer.
While the exact gradient can speed up the convergence and is
often beneficial, this ain't true for the computation of the
(inverse) Hessian matrix.
Due to the :math:`n^2` number of entries (compared to :math:`n` in the
gradient) from the :math:`n` parameters, this can grow quite
large and become computationally expensive.
Therefore, many algorithms use an approximated (inverse)
Hessian matrix making use of the gradient updates instead
of calculating the exact matrix. This turns out to be
precise enough and usually considerably speeds up the
convergence.
The following are possible choices:
If set to ``False`` or ``'zfit'``, the
hessian defined in the loss (usually using automatic differentiation)
will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.hessian|
|@doc:minimizer.scipy.hessian.internal| A :class:`~scipy.optimize.HessianUpdateStrategy` that holds
an approximation of the hessian. For example
:class:`~scipy.optimize.BFGS` (which performs usually best)
or :class:`~scipy.optimize.SR1`
(sometimes unstable updates).
``True`` (or ``None``; default) tells the minimizer
to use its default internal
hessian approximation.
Arguments ``'2-point'`` and ``'3-point'`` specify which
numerical algorithm the minimizer should use in order to
estimate the hessian. This is only possible if the
gradient is provided by zfit and not an internal numerical
method is already used to determine it. |@docend:minimizer.scipy.hessian.internal|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
if init_trust_radius is not None:
options["initial_tr_radius"] = init_trust_radius
def initializer(options, init, stepsize, **_):
trust_radius = None
if init is not None:
trust_radius = init.info.get("tr_radius")
elif stepsize is not None:
trust_radius = np.mean(stepsize)
if trust_radius is not None:
options["initial_tr_radius"] = trust_radius
return options
def verbosity_setter(options, verbosity):
options.pop("disp", None)
if verbosity > 8:
v = 3
elif verbosity > 6:
v = 2
elif verbosity > 1:
v = 1
else:
v = 0
options["verbose"] = v # negative is quite, goes to 100. start at verbosity > 1
return options
minimizer_options = {}
if options:
minimizer_options["options"] = options
scipy_tols = {"gtol": None, "xtol": None}
if hessian is None:
hessian = BFGS
super().__init__(
method="trust-constr",
internal_tol=scipy_tols,
gradient=gradient,
hessian=hessian,
minimizer_options=minimizer_options,
tol=tol,
verbosity=verbosity,
maxiter=maxiter,
initializer=initializer,
verbosity_setter=verbosity_setter,
strategy=strategy,
criterion=criterion,
name=name,
)
ScipyTrustConstr._add_derivative_methods(
gradient=[
"2-point",
"3-point",
# 'cs', # works badly
None,
True,
False,
"zfit",
],
hessian=[
"2-point",
"3-point",
# 'cs',
scipy.optimize.BFGS,
scipy.optimize.SR1,
None,
True,
False,
"zfit",
],
)
class ScipyNewtonCG(ScipyBaseMinimizer):
@warn_experimental_feature
def __init__(
self,
tol: float | None = None,
gradient: Callable | str | None = None,
hessian: None | (Callable | str | scipy.optimize.HessianUpdateStrategy) = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy Newton-CG ",
) -> None:
"""WARNING! This algorithm seems unstable and may does not perform well!
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation
that the minimizer should use. This can be the
gradient provided by the loss itself or
method from the minimizer.
In general, using the zfit provided automatic gradient is
more precise and needs less computation time for the
evaluation compared to a numerical method, but it may not always be
possible. In this case, zfit switches to a generic, numerical gradient
which in general performs worse than if the minimizer has its own
numerical gradient.
The following are possible choices:
If set to ``False`` or ``'zfit'`` (or ``None``; default), the
gradient of the loss (usually the automatic gradient) will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient|
|@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal
gradient estimation. This can be specified more clearly using the
arguments ``'2-point'`` and ``'3-point'``, which specify the
numerical algorithm the minimizer should use in order to
estimate the gradient. |@docend:minimizer.scipy.gradient.internal|
hessian: |@doc:minimizer.scipy.hessian| Define the method to use for the hessian computation
that the minimizer should use. This can be the
hessian provided by the loss itself or
method from the minimizer.
While the exact gradient can speed up the convergence and is
often beneficial, this ain't true for the computation of the
(inverse) Hessian matrix.
Due to the :math:`n^2` number of entries (compared to :math:`n` in the
gradient) from the :math:`n` parameters, this can grow quite
large and become computationally expensive.
Therefore, many algorithms use an approximated (inverse)
Hessian matrix making use of the gradient updates instead
of calculating the exact matrix. This turns out to be
precise enough and usually considerably speeds up the
convergence.
The following are possible choices:
If set to ``False`` or ``'zfit'``, the
hessian defined in the loss (usually using automatic differentiation)
will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.hessian|
|@doc:minimizer.scipy.hessian.internal| A :class:`~scipy.optimize.HessianUpdateStrategy` that holds
an approximation of the hessian. For example
:class:`~scipy.optimize.BFGS` (which performs usually best)
or :class:`~scipy.optimize.SR1`
(sometimes unstable updates).
``True`` (or ``None``; default) tells the minimizer
to use its default internal
hessian approximation.
Arguments ``'2-point'`` and ``'3-point'`` specify which
numerical algorithm the minimizer should use in order to
estimate the hessian. This is only possible if the
gradient is provided by zfit and not an internal numerical
method is already used to determine it. |@docend:minimizer.scipy.hessian.internal|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
minimizer_options = {}
if options := {}:
minimizer_options["options"] = options
scipy_tols = {"xtol": None}
if hessian is None:
hessian = BFGS
method = "Newton-CG"
super().__init__(
method=method,
internal_tol=scipy_tols,
gradient=gradient,
hessian=hessian,
minimizer_options=minimizer_options,
tol=tol,
verbosity=verbosity,
maxiter=maxiter,
strategy=strategy,
criterion=criterion,
name=name,
)
ScipyNewtonCG._add_derivative_methods(
gradient=[
"2-point",
"3-point",
# 'cs', # works badly
None,
True,
False,
"zfit",
],
hessian=[
"2-point",
"3-point",
# 'cs',
scipy.optimize.BFGS,
scipy.optimize.SR1,
None,
True,
False,
"zfit",
],
)
[docs]
class ScipyTruncNC(ScipyBaseMinimizer):
def __init__(
self,
tol: float | None = None,
maxcg: int | None = None, # maxCGit
maxls: int | None = None, # stepmx
eta: float | None = None,
rescale: float | None = None,
gradient: Callable | str | None = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy Truncated Newton Conjugate ",
) -> None:
"""Local, gradient based minimization algorithm using a truncated Newton method.
`Truncated Newton Methods <https://en.wikipedia.org/wiki/Truncated_Newton_method>`_ provide
a hessian-free way of optimization.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
maxcg: Maximum number of conjugate gradient evaluations (hessian*vector evaluations)
per main iteration. If maxCGit == 0, the direction chosen is -gradient if maxCGit < 0, maxCGit is set to max(1,min(50,n/2)). Defaults to -1.
maxls: Maximum step for the line search. May be increased during call.
If too small, it will be set to 10.0. Defaults to 0.
eta: Severity of the line search, should be between 0 and 1.
rescale: Scaling factor (in log10) used to trigger loss value rescaling.
If set to 0, rescale at each iteration.
If it is a very large value, never rescale.
gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation
that the minimizer should use. This can be the
gradient provided by the loss itself or
method from the minimizer.
In general, using the zfit provided automatic gradient is
more precise and needs less computation time for the
evaluation compared to a numerical method, but it may not always be
possible. In this case, zfit switches to a generic, numerical gradient
which in general performs worse than if the minimizer has its own
numerical gradient.
The following are possible choices:
If set to ``False`` or ``'zfit'`` (or ``None``; default), the
gradient of the loss (usually the automatic gradient) will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient|
|@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal
gradient estimation. This can be specified more clearly using the
arguments ``'2-point'`` and ``'3-point'``, which specify the
numerical algorithm the minimizer should use in order to
estimate the gradient. |@docend:minimizer.scipy.gradient.internal|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
if maxcg is not None:
options["maxiter_cg"] = maxcg
if eta is not None:
options["eta"] = eta
if maxls is not None:
options["maxstep_ls"] = maxls
if rescale is not None:
options["rescale"] = rescale
options["maxfun"] = None # in order to use maxiter
minimizer_options = {}
if options:
minimizer_options["options"] = options
def initializer(options, stepsize, **_):
if stepsize is not None:
options["scale"] = stepsize
return options
scipy_tols = {"xtol": None, "ftol": None, "gtol": None}
method = "TNC"
super().__init__(
method=method,
tol=tol,
verbosity=verbosity,
strategy=strategy,
gradient=gradient,
hessian=NOT_SUPPORTED,
criterion=criterion,
internal_tol=scipy_tols,
maxiter=maxiter,
initializer=initializer,
minimizer_options=minimizer_options,
name=name,
)
ScipyTruncNC._add_derivative_methods(
gradient=[
"2-point",
"3-point",
# 'cs' # works badly
None,
True,
False,
"zfit",
]
)
[docs]
class ScipyDogleg(ScipyBaseMinimizer):
def __init__(
self,
tol: float | None = None,
init_trust_radius: int | None = None,
eta: float | None = None,
max_trust_radius: int | None = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy Dogleg ",
) -> None:
"""This minimizer requires the hessian and gradient to be provided by the loss itself.
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
eta: |@doc:minimizer.trust.eta| Trust region related acceptance
stringency for proposed steps. |@docend:minimizer.trust.eta|
init_trust_radius: |@doc:minimizer.trust.init_trust_radius| Initial trust-region radius. |@docend:minimizer.trust.init_trust_radius|
max_trust_radius: |@doc:minimizer.trust.max_max_trust_radius||@docend:minimizer.trust.max_max_trust_radius|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
if init_trust_radius is not None:
options["initial_tr_radius"] = init_trust_radius
if eta is not None:
options["eta"] = eta
if max_trust_radius is not None:
options["max_trust_radius"] = max_trust_radius
minimizer_options = {}
if options:
minimizer_options["options"] = options
scipy_tols = {"gtol": None}
super().__init__(
method="dogleg",
internal_tol=scipy_tols,
gradient="zfit",
hessian="zfit",
minimizer_options=minimizer_options,
tol=tol,
verbosity=verbosity,
maxiter=maxiter,
strategy=strategy,
criterion=criterion,
name=name,
)
ScipyDogleg._add_derivative_methods(gradient=["zfit"], hessian=["zfit"])
[docs]
class ScipyPowell(ScipyBaseMinimizer):
def __init__(
self,
tol: float | None = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy Powell ",
) -> None:
"""Local minimizer using the modified Powell algorithm.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
minimizer_options = {}
if options:
minimizer_options["options"] = options
scipy_tols = {"xtol": None, "ftol": None}
def initializer(options, init, **_):
if init is not None:
direc = init.info["original"].get("direc")
if direc is not None:
options["direc"] = direc
return options
method = "Powell"
super().__init__(
method=method,
internal_tol=scipy_tols,
gradient=NOT_SUPPORTED,
hessian=NOT_SUPPORTED,
minimizer_options=minimizer_options,
tol=tol,
maxiter=maxiter,
initializer=initializer,
verbosity=verbosity,
strategy=strategy,
criterion=criterion,
name=name,
)
[docs]
class ScipySLSQP(ScipyBaseMinimizer):
def __init__(
self,
tol: float | None = None,
gradient: Callable | str | None = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy SLSQP ",
) -> None:
"""Local, gradient-based minimizer using tho Sequential Least Squares Programming algorithm.name.
`Sequential Least Squares Programming <https://en.wikipedia.org/wiki/Sequential_quadratic_programming>`_
is an iterative method for nonlinear parameter optimization.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
gradient: |@doc:minimizer.scipy.gradient| Define the method to use for the gradient computation
that the minimizer should use. This can be the
gradient provided by the loss itself or
method from the minimizer.
In general, using the zfit provided automatic gradient is
more precise and needs less computation time for the
evaluation compared to a numerical method, but it may not always be
possible. In this case, zfit switches to a generic, numerical gradient
which in general performs worse than if the minimizer has its own
numerical gradient.
The following are possible choices:
If set to ``False`` or ``'zfit'`` (or ``None``; default), the
gradient of the loss (usually the automatic gradient) will be used;
the minimizer won't use an internal algorithm. |@docend:minimizer.scipy.gradient|
|@doc:minimizer.scipy.gradient.internal| ``True`` tells the minimizer to use its default internal
gradient estimation. This can be specified more clearly using the
arguments ``'2-point'`` and ``'3-point'``, which specify the
numerical algorithm the minimizer should use in order to
estimate the gradient. |@docend:minimizer.scipy.gradient.internal|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
minimizer_options = {}
if options := {}:
minimizer_options["options"] = options
scipy_tols = {"ftol": None}
method = "SLSQP"
super().__init__(
method=method,
internal_tol=scipy_tols,
gradient=gradient,
hessian=NOT_SUPPORTED,
minimizer_options=minimizer_options,
tol=tol,
verbosity=verbosity,
maxiter=maxiter,
strategy=strategy,
criterion=criterion,
name=name,
)
ScipySLSQP._add_derivative_methods(
gradient=[
"2-point",
"3-point",
# 'cs', # works badly
None,
True,
False,
"zfit",
]
)
[docs]
class ScipyCOBYLA(ScipyBaseMinimizer):
@warn_experimental_feature
def __init__(
self,
tol: float | None = None,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy COBYLA ",
) -> None:
"""UNSTABLE! Local gradient-free dowhhill simplex-like method with an implicit linear approximation.
COBYLA constructs successive linear approximations of the objective function and constraints via a
simplex of n+1 points (in n dimensions), and optimizes these approximations in a trust region at each step.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
minimizer_options = {"options": options}
scipy_tols = {"tol": None}
method = "COBYLA"
super().__init__(
method=method,
internal_tol=scipy_tols,
gradient=NOT_SUPPORTED,
hessian=NOT_SUPPORTED,
minimizer_options=minimizer_options,
tol=tol,
maxiter=maxiter,
verbosity=verbosity,
strategy=strategy,
criterion=criterion,
name=name,
)
class ScipyNelderMead(ScipyBaseMinimizer):
def __init__(
self,
tol: float | None = None,
adaptive: bool | None = True,
verbosity: int | None = None,
maxiter: int | str | None = None,
criterion: ConvergenceCriterion | None = None,
strategy: ZfitStrategy | None = None,
name: str = "SciPy Nelder-Mead ",
) -> None:
"""Local gradient-free dowhhill simplex method.py.
`Nelder-Mead <https://en.wikipedia.org/wiki/Nelder%E2%80%93Mead_method>`_
is a gradient-free method to minimize an objective function. It's performance is
usually inferior to gradient based algorithms.
|@doc:minimizer.scipy.info| This implenemtation wraps the minimizers in
`SciPy optimize <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_. |@docend:minimizer.scipy.info|
Args:
tol: |@doc:minimizer.tol| Termination value for the
convergence/stopping criterion of the algorithm
in order to determine if the minimum has
been found. Defaults to 1e-3. |@docend:minimizer.tol|
adaptive:
verbosity: |@doc:minimizer.verbosity| Verbosity of the minimizer. Has to be between 0 and 10.
The verbosity has the meaning:
- a value of 0 means quiet and no output
- above 0 up to 5, information that is good to know but without
flooding the user, corresponding to a "INFO" level.
- A value above 5 starts printing out considerably more and
is used more for debugging purposes.
- Setting the verbosity to 10 will print out every
evaluation of the loss function and gradient.
Some minimizers offer additional output which is also
distributed as above but may duplicate certain printed values. |@docend:minimizer.verbosity|
maxiter: |@doc:minimizer.maxiter| Approximate number of iterations.
This corresponds to roughly the maximum number of
evaluations of the ``value``, 'gradient`` or ``hessian``. |@docend:minimizer.maxiter|
criterion: |@doc:minimizer.criterion| Criterion of the minimum. This is an
estimated measure for the distance to the
minimum and can include the relative
or absolute changes of the parameters,
function value, gradients and more.
If the value of the criterion is smaller
than ``loss.errordef * tol``, the algorithm
stopps and it is assumed that the minimum
has been found. |@docend:minimizer.criterion|
strategy: |@doc:minimizer.strategy| A class of type ``ZfitStrategy`` that takes no
input arguments in the init. Determines the behavior of the minimizer in
certain situations, most notably when encountering
NaNs. It can also implement a callback function. |@docend:minimizer.strategy|
name: |@doc:minimizer.name| Human-readable name of the minimizer. |@docend:minimizer.name|
"""
options = {}
minimizer_options = {}
if adaptive is not None:
options["adaptive"] = adaptive
minimizer_options["options"] = options
def initializer(options, init, **_):
if init is not None:
init_simplex = init.info["original"].get("final_simplex")
if init_simplex is not None:
options["initial_simplex"] = init_simplex
return options
scipy_tols = {"fatol": None, "xatol": None}
method = "Nelder-Mead"
super().__init__(
method=method,
internal_tol=scipy_tols,
gradient=NOT_SUPPORTED,
hessian=NOT_SUPPORTED,
minimizer_options=minimizer_options,
tol=tol,
maxiter=maxiter,
initializer=initializer,
verbosity=verbosity,
strategy=strategy,
criterion=criterion,
name=name,
)
def combine_optimize_results(results):
if len(results) == 1:
return results[0]
result = results[-1]
for field in ["nfev", "njev", "nhev", "nit"]:
if field in result:
result[field] = sum(res[field] for res in results)
return result