Source code for zfit.minimizers.minimizer_tfp

#  Copyright (c) 2020 zfit
from collections import OrderedDict
from typing import Mapping

import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp

from .baseminimizer import BaseMinimizer, print_gradients, ZfitStrategy
from .fitresult import FitResult


[docs]class BFGS(BaseMinimizer): def __init__(self, strategy: ZfitStrategy = None, tolerance: float = 1e-5, verbosity: int = 5, name: str = "BFGS_TFP", options: Mapping = None) -> None: """ Args: strategy (ZfitStrategy): Strategy that handles NaN and more (to come, experimental) tolerance (float): Difference between the function value that suffices to stop minimization verbosity: The higher, the more is printed. Between 1 and 10 typically name: Name of the Minimizer options: A `dict` containing the options given to the minimization function, overriding the default """ self.options = {} if options is None else options super().__init__(strategy=strategy, tolerance=tolerance, verbosity=verbosity, name=name, minimizer_options={}) def _minimize(self, loss, params): minimizer_fn = tfp.optimizer.bfgs_minimize params = tuple(params) do_print = self.verbosity > 5 @tf.function(autograph=False, experimental_relax_shapes=True) def update_params_value_grad(loss, params, values): for param, value in zip(params, tf.unstack(values, axis=0)): param.set_value(value) value, gradients = loss.value_gradients(params=params) return gradients, value def to_minimize_func(values): gradients, value = update_params_value_grad(loss, params, values) if do_print: print_gradients(params, values.numpy(), [float(g.numpy()) for g in gradients]) loss_evaluated = value.numpy() if np.isnan(loss_evaluated): self.strategy.minimize_nan(loss=loss, minimizer=self, loss_value=loss_evaluated, params=params) gradients = tf.stack(gradients) return value, gradients initial_inv_hessian_est = tf.linalg.tensor_diag([p.step_size for p in params]) minimizer_kwargs = dict( initial_position=tf.stack(params), # tolerance=1e-4, f_relative_tolerance=self.tolerance * 1e-2, # TODO: use edm for stopping criteria initial_inverse_hessian_estimate=initial_inv_hessian_est, parallel_iterations=1, max_iterations=300 ) minimizer_kwargs.update(self.options) result = minimizer_fn(to_minimize_func, **minimizer_kwargs) # save result params_result = result.position.numpy() self._update_params(params, values=params_result) info = {'n_eval': result.num_objective_evaluations.numpy(), 'n_iter': result.num_iterations.numpy(), 'grad': result.objective_gradient.numpy(), 'original': result} edm = -999 fmin = result.objective_value.numpy() status = -999 converged = result.converged.numpy() params = OrderedDict((p, val) for p, val in zip(params, params_result)) result = FitResult(params=params, edm=edm, fmin=fmin, info=info, loss=loss, status=status, converged=converged, minimizer=self.copy()) return result