Examples#
Minimizing Python functions#
zfit minimizers can also be used to minimize normal Python functions as shown here.
"""This example illustrates how to minimize an arbitrary Python function using the zfit minimizer.
This may has some overhead in the beginning and won't be instantly fast compared to other libraries if run once.
Copyright (c) 2021 zfit
"""
# Copyright (c) 2022 zfit
import numpy as np
import zfit
# set everything to numpy mode. This is only needed if we
# don't use
zfit.run.set_autograd_mode(False)
zfit.run.set_graph_mode(False)
# create our favorite minimizer
minimizer = zfit.minimize.IpyoptV1()
# minimizer = zfit.minimize.Minuit()
# minimizer = zfit.minimize.ScipyTrustConstrV1()
# minimizer = zfit.minimize.NLoptLBFGSV1()
def func(x):
x = np.asarray(x) # make sure it's an array
return np.sum((x - 0.1) ** 2 + x[1] ** 4)
# We can use the same in pure TF, then we can also use
# the analytic gradient
#
# import tensorflow as tf
#
# @tf.function
# def func(x):
# x = tf.convert_to_tensor(x) # make sure it's an array
# return tf.reduce_sum((x - 0.1) ** 2 + x[1] ** 4)
# we can also use a more complicated function instead
# from scipy.optimize import rosen as func
# we need to set the errordef, the definition of "1 sigma"
func.errordef = 0.5
# initial parameters
params = [1, -3, 2, 1.4, 11]
# or for a more fine-grained control
# params = {
# 'value': [1, -3, 2, 1.4, 11], # mandatory
# 'lower': [-2, -5, -5, -10, -15], # lower bound, can be omitted
# 'upper': [2, 4, 5, 10, 15], # upper bound, can be omitted
# 'step_size': [0.1] * 5, # initial step size, can be omitted
# }
# minimize
result = minimizer.minimize(func, params)
# estimate errors
result.hesse(name="hesse")
result.errors(name="errors")
print(result)
Composing PDFs#
# Copyright (c) 2023 zfit
import zfit
# create space
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu = zfit.Parameter("mu", 1.0, -4, 6)
sigma = zfit.Parameter("sigma", 1.0, 0.1, 10)
lambd = zfit.Parameter("lambda", -1.0, -5.0, 0)
frac = zfit.Parameter("fraction", 0.5, 0.0, 1.0)
# pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
exponential = zfit.pdf.Exponential(lambd, obs=obs)
sum_pdf = zfit.pdf.SumPDF([gauss, exponential], fracs=frac)
free_param = zfit.Parameter("free", 5, 0, 15)
param_shift = zfit.ComposedParameter("comp", lambda x: x + 5, free_param)
Custom 3D Functor#
# Copyright (c) 2022 zfit
# Functors are PDfs and Functions that depend on other PDFs or Functions. They can be used to define
# in a custom way combinations of PDFs or wrapping a single PDF.
# An example would be to create the sum of two PDFs. Of course this is already implemented in zfit
# as the functor SumPDF([pdf1, pdf2], fracs=...). For advanced uses, you
# can define your own functor as demonstrated below.
import numpy as np
import zfit
class CombinePolynomials(zfit.pdf.BaseFunctor):
"""Example of a functor pdf that adds three polynomials.
DEMONSTRATION PURPOSE ONLY, DO **NOT** USE IN REAL CASE.
"""
def __init__(self, angle1, angle2, angle3, name="3dPolynomial"):
pdfs = [angle1, angle2, angle3]
super().__init__(pdfs=pdfs, name=name)
_N_OBS = 3
def _unnormalized_pdf(self, x):
cosangle1_data, cosangle2_data, angle3_data = x.unstack_x()
cosangle1, cosangle2, angle3 = self.pdfs
pdf = (
cosangle1.pdf(cosangle1_data)
+ cosangle2.pdf(cosangle2_data)
+ angle3.pdf(angle3_data)
)
return pdf
def create_angular():
# create parameters
# c00 = zfit.Parameter(...)
# ... and so on
cosangle1_space = zfit.Space("cos angle 1", limits=(-1, 1))
cosangle2_space = zfit.Space("cos angle 2", limits=(-1, 1))
angle3_space = zfit.Space("angle 3", limits=(-np.pi, np.pi))
# this part could also be moved inside the __init__, but then the init would need to take all
# the coefficients and the spaces as arguments
cosangle1_pdf = zfit.pdf.Chebyshev(obs=cosangle1_space, coeffs=[c00, c01, c02])
cosangle2_pdf = zfit.pdf.Chebyshev(obs=cosangle2_space, coeffs=[c10, c11, c12])
angle3_pdf = zfit.pdf.Chebyshev(obs=angle3_space, coeffs=[c20, c21, c22])
return CombinePolynomials(
angle1=cosangle1_pdf, angle2=cosangle2_pdf, angle3=angle3_pdf
)
Custom PDF simple#
# Copyright (c) 2022 zfit
import zfit
from zfit import z
import zfit.z.numpy as znp
class CustomPDF(zfit.pdf.ZPDF):
"""1-dimensional PDF implementing the exp(alpha * x) shape."""
_PARAMS = ["alpha"] # specify which parameters to take
def _unnormalized_pdf(self, x): # implement function
data = z.unstack_x(x)
alpha = self.params["alpha"]
return znp.exp(alpha * data)
obs = zfit.Space("obs1", limits=(-4, 4))
custom_pdf = CustomPDF(obs=obs, alpha=0.2)
integral = custom_pdf.integrate(limits=(-1, 2))
sample = custom_pdf.sample(n=1000)
prob = custom_pdf.pdf(sample)
Custom PDF advanced#
# Copyright (c) 2024 zfit
import tensorflow as tf
import zfit
from zfit import z
class CustomPDF2D(zfit.pdf.BasePDF):
"""My custom, 2 dimensional pdf where the axes are: Energy, Momentum."""
def __init__(
self,
param1,
param2,
param3,
obs,
name="CustomPDF",
):
# we can now do complicated stuff here if needed
# only thing: we have to specify explicitly here what is which parameter
params = {
"super_param": param1, # we can change/compose etc parameters
"param2": param2,
"param3": param3,
}
super().__init__(obs, params, name=name)
def _unnormalized_pdf(self, x):
energy, momentum = x.unstack_x()
param1 = self.params["super_param"]
param2 = self.params["param2"]
param3 = self.params["param3"]
# just a fantasy function
probs = param1 * tf.cos(energy**2) + tf.math.log(param2 * momentum**2) + param3
return probs
# add an analytic integral
# define the integral function
def integral_full(limits, norm_range, params, model):
(
lower,
upper,
) = limits.rect_limits # for a more detailed guide, see the space.py example
param1 = params["super_param"]
param2 = params["param2"]
param3 = params["param3"]
lower = z.convert_to_tensor(lower)
upper = z.convert_to_tensor(upper)
# calculate the integral here, dummy integral, wrong!
integral = param1 * param2 * param3 + z.reduce_sum([lower, upper])
return integral
# define the space over which it is defined. Here, we use the axes
lower_full = (-10, zfit.Space.ANY_LOWER)
upper_full = (10, zfit.Space.ANY_UPPER)
integral_full_limits = zfit.Space(axes=(0, 1), limits=(lower_full, upper_full))
CustomPDF2D.register_analytic_integral(func=integral_full, limits=integral_full_limits)
# define the partial integral function
def integral_axis1(x, limits, norm_range, params, model):
data_0 = x.unstack_x() # data from axis 0
param1 = params["super_param"]
param2 = params["param2"]
param3 = params["param3"]
lower, upper = limits.limit1d # for a more detailed guide, see the space.py example
lower = z.convert_to_tensor(lower) # the limits are now 1-D, for axis 1
upper = z.convert_to_tensor(upper)
# calculate the integral here, dummy integral
integral = data_0**2 * param1 * param2 * param3 + z.reduce_sum([lower, upper])
# notice that the returned shape will be in the same as data_0, e.g. the number of events given in x
return integral
# define the space over which it is defined. Here, we use the axes
lower_axis1 = ((zfit.Space.ANY_LOWER,),)
upper_axis1 = ((zfit.Space.ANY_UPPER,),)
integral_axis1_limits = zfit.Space(
axes=(1,), # axes one corresponds to the second obs, here obs2
limits=(lower_axis1, upper_axis1),
)
CustomPDF2D.register_analytic_integral(
func=integral_axis1, limits=integral_axis1_limits
)
if __name__ == "__main__":
import numpy as np
obs = zfit.Space("obs1", (-10, 10)) * zfit.Space("obs2", (-3, 5))
pdf = CustomPDF2D(1, 2, 3, obs=obs)
sample = pdf.sample(n=1000)
pdf.pdf([[2.0, 2.5], [5.4, 3.2]])
x_part = zfit.Data.from_numpy(array=np.array([2.1, 2.2, 3.2]), obs="obs1")
# integrate over obs2 with limits 1, 2 for the `x_part`. This will use the analytic integral above
pdf.partial_integrate(x=x_part, limits=zfit.Space("obs2", (1, 2)))
# we can explicitly call the analytic integral. Without registering it (e.g. comment the line with the `register`
# and run again), it will raise an error
pdf.partial_analytic_integrate(x=x_part, limits=zfit.Space("obs2", (1, 2)))
Custom 3D PDF#
# Copyright (c) 2022 zfit
import tensorflow as tf
import zfit
from zfit import z
# IMPORTANT! The communication of which axis corresponds to which data point happens here. So the user knows now that
# he should create this pdf with a space in the obs (x, y, z).
class CustomPDF(zfit.pdf.ZPDF):
"""3-dimensional PDF calculating doing something fancy, takes x, y, z as data."""
_PARAMS = ["alpha", "beta"] # specify which parameters to take
_N_OBS = 3
def _unnormalized_pdf(self, x): # implement function
x, y, z = x.unstack_x()
alpha = self.params["alpha"]
beta = self.params["beta"]
x_new = tf.math.cos(alpha) * x
y_new = tf.math.sinh(beta) * y
z_new = z + 4.2
return x_new**2 + y_new**2 + z_new**2
xobs = zfit.Space("xobs", (-4, 4))
yobs = zfit.Space("yobs", (-3, 3))
zobs = zfit.Space("z", (-2, 2))
obs = xobs * yobs * zobs
alpha = zfit.Parameter("alpha", 0.2) # floating
beta = zfit.Parameter("beta", 0.4, floating=False) # non-floating
custom_pdf = CustomPDF(obs=obs, alpha=alpha, beta=beta)
integral = custom_pdf.integrate(limits=obs) # = 1 since normalized
sample = custom_pdf.sample(n=1000) # DO NOT USE THIS FOR TOYS!
prob = custom_pdf.pdf(sample) # DO NOT USE THIS FOR TOYS!
integral_np, sample_np, prob_np = [integral.numpy(), sample.numpy(), prob.numpy()]
Multidimensional preprocess fit#
# Copyright (c) 2022 zfit
import numpy as np
import zfit
# create space
xobs = zfit.Space("xobs", (-4, 4))
yobs = zfit.Space("yobs", (-3, 5))
zobs = zfit.Space("z", (-2, 4))
obs = xobs * yobs * zobs
# parameters
mu1 = zfit.Parameter("mu1", 1.0, -4, 6)
mu23 = zfit.Parameter("mu_shared", 1.0, -4, 6)
sigma12 = zfit.Parameter("sigma_shared", 1.0, 0.1, 10)
sigma3 = zfit.Parameter("sigma3", 1.0, 0.1, 10)
# model building, pdf creation
gauss_x = zfit.pdf.Gauss(mu=mu1, sigma=sigma12, obs=xobs)
gauss_y = zfit.pdf.Gauss(mu=mu23, sigma=sigma12, obs=yobs)
gauss_z = zfit.pdf.Gauss(mu=mu23, sigma=sigma3, obs=zobs)
product_gauss = zfit.pdf.ProductPDF([gauss_x, gauss_y, gauss_z])
# OR create directly from your 3 dimensional pdf as
# model = MyPDF(obs=obs, param1=..., param2,...)
# data
normal_np = np.random.normal(loc=[2.0, 2.5, 2.5], scale=[3.0, 3, 1.5], size=(10000, 3))
data_raw = zfit.Data.from_numpy(
obs=obs, array=normal_np
) # or from anywhere else, e.g. root
df = data_raw.to_pandas()
# preprocessing here, rename things. Match column names with the observable names "xobs", "yobs", "z" (they have to be
# contained, more columns in the df is not a problem)
data = zfit.Data.from_pandas(df, obs=obs)
# create NLL
nll = zfit.loss.UnbinnedNLL(model=product_gauss, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
print(result.params)
# do the error calculations, here with minos
param_errors, _ = result.errors()
print(param_errors)
Plot signal background#
# Copyright (c) 2023 zfit
import mplhep
import numpy as np
import zfit
mplhep.style.use("LHCb2")
import matplotlib.pyplot as plt
# create space
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu = zfit.Parameter("mu", 1.0, -4, 6)
sigma = zfit.Parameter("sigma", 1.0, 0.1, 10)
lambd = zfit.Parameter("lambda", -0.06, -1, -0.01)
frac = zfit.Parameter("fraction", 0.3, 0, 1)
# model building, pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
exponential = zfit.pdf.Exponential(lambd, obs=obs)
model = zfit.pdf.SumPDF([gauss, exponential], fracs=frac)
# data
n_sample = 5000
exp_data = exponential.sample(n=n_sample * (1 - frac)).numpy()
gauss_data = gauss.sample(n=n_sample * frac).numpy()
data = model.create_sampler(n_sample, limits=obs)
data.resample()
mu.set_value(0.5)
sigma.set_value(1.2)
lambd.set_value(-0.05)
frac.set_value(0.07)
# plot the data
data_np = data["x"].numpy()
n_bins = 50
plot_scaling = n_sample / n_bins * obs.area()
x = np.linspace(-10, 10, 1000)
lower, upper = obs.limit1d
edges = np.linspace(lower, upper, n_bins + 1)
def plot_pdf(title):
plt.figure()
plt.title(title)
y = model.pdf(x).numpy()
y_gauss = (gauss.pdf(x) * frac).numpy()
y_exp = (exponential.pdf(x) * (1 - frac)).numpy()
plt.plot(x, y * plot_scaling, label="Sum - Model")
plt.plot(x, y_gauss * plot_scaling, label="Gauss - Signal")
plt.plot(x, y_exp * plot_scaling, label="Exp - Background")
mplhep.histplot(
np.histogram(data_np, bins=edges),
yerr=True,
color="black",
histtype="errorbar",
)
plt.ylabel("Counts")
plt.xlabel("$B_{mass}$")
# plot the pdf BEFORE fitting
plot_pdf("Before fitting")
# create NLL
nll = zfit.loss.UnbinnedNLL(model=model, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
# do the error calculations, here with minos
param_errors, _ = result.errors()
plot_pdf(title="After fitting")
plt.legend()
# uncomment to display plots
# plt.show()
Simple fit#
# Copyright (c) 2022 zfit
import numpy as np
import zfit
# create space
obs = zfit.Space("x", limits=(-2, 3))
# parameters
mu = zfit.Parameter("mu", 1.2, -4, 6)
sigma = zfit.Parameter("sigma", 1.3, 0.5, 10)
# model building, pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
# data
normal_np = np.random.normal(loc=2.0, scale=3.0, size=10000)
data = zfit.Data.from_numpy(obs=obs, array=normal_np)
# create NLL
nll = zfit.loss.UnbinnedNLL(model=gauss, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit(gradient=False)
result = minimizer.minimize(nll)
# do the error calculations with a hessian approximation
param_errors = result.hesse()
# or here with minos
param_errors_asymetric, new_result = result.errors()
print(result)
Signal-background-mass fit#
# Copyright (c) 2022 zfit
import zfit
# create space
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu = zfit.Parameter("mu", 1.0, -4, 6)
sigma = zfit.Parameter("sigma", 1.0, 0.1, 10)
lambd = zfit.Parameter("lambda", -0.06, -1, -0.01)
frac = zfit.Parameter("fraction", 0.3, 0, 1)
# model building, pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
exponential = zfit.pdf.Exponential(lambd, obs=obs)
model = zfit.pdf.SumPDF([gauss, exponential], fracs=frac)
# data
n_sample = 10000
data = model.create_sampler(n_sample, limits=obs)
data.resample()
# set the values to a start value for the fit
mu.set_value(0.5)
sigma.set_value(1.2)
lambd.set_value(-0.05)
frac.set_value(0.07)
# create NLL
nll = zfit.loss.UnbinnedNLL(model=model, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
print(result)
# do the error calculations, here with minos
param_hesse = result.hesse()
param_errors, new_result = result.errors()
print(result.params)
Signal-background-mass fit extended#
# Copyright (c) 2023 zfit
import pprint
import zfit
import pickle
n_bins = 50
# create space
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu = zfit.Parameter("mu", 1.0, -4, 6)
sigma = zfit.Parameter("sigma", 1.0, 0.1, 10)
lambd = zfit.Parameter("lambda", -0.06, -1, -0.01)
# model building, pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
exponential = zfit.pdf.Exponential(lambd, obs=obs)
n_bkg = zfit.Parameter("n_bkg", 20000)
n_sig = zfit.Parameter("n_sig", 1000)
gauss_extended = gauss.create_extended(n_sig)
exp_extended = exponential.create_extended(n_bkg)
model = zfit.pdf.SumPDF([gauss_extended, exp_extended])
# data
# n_sample = 10000
data = model.create_sampler(n=21200)
data.resample()
# set the values to a start value for the fit
mu.set_value(0.5)
sigma.set_value(1.2)
lambd.set_value(-0.05)
# create NLL
nll = zfit.loss.ExtendedUnbinnedNLL(model=model, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
print(result)
# do the error calculations, here with hesse, than with minos
param_hesse = result.hesse()
(
param_errors,
_,
) = result.errors() # this returns a new FitResult if a new minimum was found
print(result.valid) # check if the result is still valid
# EXPERIMENTAL: we can serialize the model to a human-readable format with HS3
# or we can simply pickle the result (first freezing it)
import zfit.serialization as zserial
# human readable representation
pprint.pprint(zfit.hs3.dumps(model))
result.freeze()
dumped = pickle.dumps(result)
loaded = pickle.loads(dumped)
zfit.param.set_values(model.get_params(), loaded)
Simultanous fit#
# Copyright (c) 2022 zfit
# create space
import numpy as np
import zfit
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu_shared = zfit.Parameter("mu_shared", 1.0, -4, 6) # mu is a shared parameter
sigma1 = zfit.Parameter("sigma_one", 1.0, 0.1, 10)
sigma2 = zfit.Parameter("sigma_two", 1.0, 0.1, 10)
# model building, pdf creation
gauss1 = zfit.pdf.Gauss(mu=mu_shared, sigma=sigma1, obs=obs)
gauss2 = zfit.pdf.Gauss(mu=mu_shared, sigma=sigma2, obs=obs)
# data
normal_np = np.random.normal(loc=2.0, scale=3.0, size=10000)
data1 = zfit.Data.from_numpy(obs=obs, array=normal_np)
# data
normal_np = np.random.normal(loc=2.0, scale=4.0, size=10000)
data2 = zfit.Data.from_numpy(obs=obs, array=normal_np)
# create simultaenous loss, two possibilities
nll_simultaneous = zfit.loss.UnbinnedNLL(model=[gauss1, gauss2], data=[data1, data2])
# OR, equivalently
nll1 = zfit.loss.UnbinnedNLL(model=gauss1, data=data1)
nll2 = zfit.loss.UnbinnedNLL(model=gauss2, data=data2)
nll_simultaneous2 = nll1 + nll2
minimizer = zfit.minimize.Minuit()
minimizer.minimize(nll_simultaneous2)
Spaces#
# Copyright (c) 2022 zfit
import zfit
# Addition of limit with the same observables
simple_limit1 = zfit.Space(obs="obs1", limits=(-5, 1))
simple_limit2 = zfit.Space(obs="obs1", limits=(3, 7.5))
added_limits = simple_limit1 + simple_limit2
# OR equivalently
added_limits = simple_limit1.add(simple_limit2)
# multiplication of limits with different observables
first_limit_lower = (-5, 6)
first_limit_upper = (5, 10)
second_limit_lower = (7, 12)
second_limit_upper = (9, 15)
space1 = zfit.Space(obs=["obs1", "obs2"], limits=(first_limit_lower, first_limit_upper))
space2 = zfit.Space(
obs=["obs3", "obs4"], limits=(second_limit_lower, second_limit_upper)
)
space4 = space1 * space2
assert space4.obs == ("obs1", "obs2", "obs3", "obs4")
assert space4.n_obs == 4
# retreiving the limits
# for a one dimensional space, we can use `limit1d` as a shortcut
lower, upper = simple_limit1.limit1d
# However, for a higher dimensional space, this won't work. We can use `space.limits`, which is equivalent to
# (space.lower, space.upper).
lower2d, upper2d = space1.limits
# these have shape (nevents, nobs), whereas nevents is usually just 1
assert lower2d.shape == (1, space1.n_obs)
# order to retrieve now the same limits as from the first space, we can use array notation [:, i]
lower1_from2d = lower2d[:, 0]
upper1_from2d = upper2d[:, 0]
assert lower1_from2d == first_limit_lower[0]
assert upper1_from2d == first_limit_upper[0]