Examples¶
Minimizing Python functions¶
zfit minimizers can also be used to minimize normal Python functions as shown here.
"""This example illustrates how to minimize an arbitrary Python function using the zfit minimizer.
This may has some overhead in the beginning and won't be instantly fast compared to other libraries if run once.
Copyright (c) 2021 zfit
"""
# Copyright (c) 2021 zfit
import numpy as np
import zfit
# set everything to numpy mode
zfit.run.set_autograd_mode(False)
zfit.run.set_graph_mode(False)
# create our favorite minimizer
minimizer = zfit.minimize.IpyoptV1()
# minimizer = zfit.minimize.Minuit()
# minimizer = zfit.minimize.ScipyTrustKrylovV1()
# minimizer = zfit.minimize.NLoptLBFGSV1()
def func(x):
x = np.array(x) # make sure it's an array
return np.sum((x - 0.1) ** 2 + x[1] ** 4)
# we can also use a more complicated function instead
# from scipy.optimize import rosen as func
# we need to set the errordef, the definition of "1 sigma"
func.errordef = 0.5
# initial parameters
params = [1, -3, 2, 1.4, 11]
# or for a more fine-grained control
# params = {
# 'value': [1, -3, 2, 1.4, 11], # mandatory
# 'lower': [-2, -5, -5, -10, -15], # lower bound, can be omitted
# 'upper': [2, 4, 5, 10, 15], # upper bound, can be omitted
# 'step_size': [0.1] * 5, # initial step size, can be omitted
# }
# minimize
result = minimizer.minimize(func, params)
# estimate errors
result.hesse()
result.errors()
print(result)
Composing PDFs¶
# Copyright (c) 2020 zfit
import zfit
# create space
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu = zfit.Parameter("mu", 1., -4, 6)
sigma = zfit.Parameter("sigma", 1., 0.1, 10)
lambd = zfit.Parameter("lambda", -1., -5., 0)
frac = zfit.Parameter("fraction", 0.5, 0., 1.)
# pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
exponential = zfit.pdf.Exponential(lambd, obs=obs)
sum_pdf = zfit.pdf.SumPDF([gauss, exponential], fracs=frac)
Custom 3D Functor¶
# Copyright (c) 2021 zfit
# Functors are PDfs and Functions that depend on other PDFs or Functions. They can be used to define
# in a custom way combinations of PDFs or wrapping a single PDF.
# An example would be to create the sum of two PDFs. Of course this is already implemented in zfit
# as the functor SumPDF([pdf1, pdf2], fracs=...). For advanced uses, you
# can define your own functor as demonstrated below.
import numpy as np
import zfit
class CombinePolynomials(zfit.pdf.BaseFunctor):
"""Example of a functor pdf that adds three polynomials.
DEMONSTRATION PURPOSE ONLY, DO **NOT** USE IN REAL CASE.
"""
def __init__(self, angle1, angle2, angle3, name="3dPolynomial"):
pdfs = [angle1, angle2, angle3]
super().__init__(pdfs=pdfs, name=name)
_N_OBS = 3
def _unnormalized_pdf(self, x):
cosangle1_data, cosangle2_data, angle3_data = x.unstack_x()
cosangle1, cosangle2, angle3 = self.pdfs
pdf = cosangle1.pdf(cosangle1_data) + cosangle2.pdf(cosangle2_data) + angle3.pdf(angle3_data)
return pdf
def create_angular():
# create parameters
# c00 = zfit.Parameter(...)
# ... and so on
cosangle1_space = zfit.Space('cos angle 1', limits=(-1, 1))
cosangle2_space = zfit.Space('cos angle 2', limits=(-1, 1))
angle3_space = zfit.Space('angle 3', limits=(-np.pi, np.pi))
# this part could also be moved inside the __init__, but then the init would need to take all
# the coefficients and the spaces as arguments
cosangle1_pdf = zfit.pdf.Chebyshev(obs=cosangle1_space, coeffs=[c00, c01, c02])
cosangle2_pdf = zfit.pdf.Chebyshev(obs=cosangle2_space, coeffs=[c10, c11, c12])
angle3_pdf = zfit.pdf.Chebyshev(obs=angle3_space, coeffs=[c20, c21, c22])
return CombinePolynomials(angle1=cosangle1_pdf, angle2=cosangle2_pdf, angle3=angle3_pdf)
Custom PDF simple¶
# Copyright (c) 2020 zfit
import zfit
from zfit import z
class CustomPDF(zfit.pdf.ZPDF):
"""1-dimensional PDF implementing the exp(alpha * x) shape."""
_PARAMS = ['alpha'] # specify which parameters to take
def _unnormalized_pdf(self, x): # implement function
data = z.unstack_x(x)
alpha = self.params['alpha']
return z.exp(alpha * data)
obs = zfit.Space("obs1", limits=(-4, 4))
custom_pdf = CustomPDF(obs=obs, alpha=0.2)
integral = custom_pdf.integrate(limits=(-1, 2))
sample = custom_pdf.sample(n=1000)
prob = custom_pdf.pdf(sample)
Custom PDF advanced¶
# Copyright (c) 2021 zfit
import tensorflow as tf
import zfit
from zfit import z
class CustomPDF2D(zfit.pdf.BasePDF):
"""My custom, 2 dimensional pdf where the axes are: Energy, Momentum."""
def __init__(self, param1, param2, param3, obs, name="CustomPDF", ):
# we can now do complicated stuff here if needed
# only thing: we have to specify explicitly here what is which parameter
params = {'super_param': param1, # we can change/compose etc parameters
'param2': param2, 'param3': param3}
super().__init__(obs, params, name=name)
def _unnormalized_pdf(self, x):
energy, momentum = x.unstack_x()
param1 = self.params['super_param']
param2 = self.params['param2']
param3 = self.params['param3']
# just a fantasy function
probs = param1 * tf.cos(energy ** 2) + tf.math.log(param2 * momentum ** 2) + param3
return probs
# add an analytic integral
# define the integral function
def integral_full(limits, norm_range, params, model):
lower, upper = limits.rect_limits # for a more detailed guide, see the space.py example
param1 = params['super_param']
param2 = params['param2']
param3 = params['param3']
lower = z.convert_to_tensor(lower)
upper = z.convert_to_tensor(upper)
# calculate the integral here, dummy integral, wrong!
integral = param1 * param2 * param3 + z.reduce_sum([lower, upper])
return integral
# define the space over which it is defined. Here, we use the axes
lower_full = (-10, zfit.Space.ANY_LOWER)
upper_full = (10, zfit.Space.ANY_UPPER)
integral_full_limits = zfit.Space(axes=(0, 1),
limits=(lower_full, upper_full))
CustomPDF2D.register_analytic_integral(func=integral_full,
limits=integral_full_limits)
# define the partial integral function
def integral_axis1(x, limits, norm_range, params, model):
data_0 = x.unstack_x() # data from axis 0
param1 = params['super_param']
param2 = params['param2']
param3 = params['param3']
lower, upper = limits.limit1d # for a more detailed guide, see the space.py example
lower = z.convert_to_tensor(lower) # the limits are now 1-D, for axis 1
upper = z.convert_to_tensor(upper)
# calculate the integral here, dummy integral
integral = data_0 ** 2 * param1 * param2 * param3 + z.reduce_sum([lower, upper])
# notice that the returned shape will be in the same as data_0, e.g. the number of events given in x
return integral
# define the space over which it is defined. Here, we use the axes
lower_axis1 = ((zfit.Space.ANY_LOWER,),)
upper_axis1 = ((zfit.Space.ANY_UPPER,),)
integral_axis1_limits = zfit.Space(axes=(1,), # axes one corresponds to the second obs, here obs2
limits=(lower_axis1, upper_axis1))
CustomPDF2D.register_analytic_integral(func=integral_axis1,
limits=integral_axis1_limits)
if __name__ == '__main__':
import numpy as np
obs = zfit.Space('obs1', (-10, 10)) * zfit.Space('obs2', (-3, 5))
pdf = CustomPDF2D(1, 2, 3, obs=obs)
sample = pdf.sample(n=1000)
pdf.pdf([[2., 2.5], [5.4, 3.2]])
x_part = zfit.Data.from_numpy(array=np.array([2.1, 2.2, 3.2]), obs='obs1')
# integrate over obs2 with limits 1, 2 for the `x_part`. This will use the analytic integral above
pdf.partial_integrate(x=x_part, limits=zfit.Space('obs2', (1, 2)))
# we can explicitly call the analytic integral. Without registering it (e.g. comment the line with the `register`
# and run again), it will raise an error
pdf.partial_analytic_integrate(x=x_part, limits=zfit.Space('obs2', (1, 2)))
Custom 3D PDF¶
# Copyright (c) 2021 zfit
import tensorflow as tf
import zfit
from zfit import z
# IMPORTANT! The communication of which axis corresponds to which data point happens here. So the user knows now that
# he should create this pdf with a space in the obs (x, y, z).
class CustomPDF(zfit.pdf.ZPDF):
"""3-dimensional PDF calculating doing something fancy, takes x, y, z as data."""
_PARAMS = ['alpha', 'beta'] # specify which parameters to take
_N_OBS = 3
def _unnormalized_pdf(self, x): # implement function
x, y, z = x.unstack_x()
alpha = self.params['alpha']
beta = self.params['beta']
x_new = tf.math.cos(alpha) * x
y_new = tf.math.sinh(beta) * y
z_new = z + 4.2
return x_new ** 2 + y_new ** 2 + z_new ** 2
xobs = zfit.Space('xobs', (-4, 4))
yobs = zfit.Space('yobs', (-3, 3))
zobs = zfit.Space('z', (-2, 2))
obs = xobs * yobs * zobs
alpha = zfit.Parameter("alpha", 0.2) # floating
beta = zfit.Parameter("beta", 0.4, floating=False) # non-floating
custom_pdf = CustomPDF(obs=obs, alpha=alpha, beta=beta)
integral = custom_pdf.integrate(limits=obs) # = 1 since normalized
sample = custom_pdf.sample(n=1000) # DO NOT USE THIS FOR TOYS!
prob = custom_pdf.pdf(sample) # DO NOT USE THIS FOR TOYS!
integral_np, sample_np, prob_np = [integral.numpy(), sample.numpy(), prob.numpy()]
Multidimensional preprocess fit¶
# Copyright (c) 2020 zfit
import numpy as np
import zfit
# create space
xobs = zfit.Space('xobs', (-4, 4))
yobs = zfit.Space('yobs', (-3, 5))
zobs = zfit.Space('z', (-2, 4))
obs = xobs * yobs * zobs
# parameters
mu1 = zfit.Parameter("mu1", 1., -4, 6)
mu23 = zfit.Parameter("mu_shared", 1., -4, 6)
sigma12 = zfit.Parameter("sigma_shared", 1., 0.1, 10)
sigma3 = zfit.Parameter("sigma3", 1., 0.1, 10)
# model building, pdf creation
gauss_x = zfit.pdf.Gauss(mu=mu1, sigma=sigma12, obs=xobs)
gauss_y = zfit.pdf.Gauss(mu=mu23, sigma=sigma12, obs=yobs)
gauss_z = zfit.pdf.Gauss(mu=mu23, sigma=sigma3, obs=zobs)
product_gauss = zfit.pdf.ProductPDF([gauss_x, gauss_y, gauss_z])
# OR create directly from your 3 dimensional pdf as
# model = MyPDF(obs=obs, param1=..., param2,...)
# data
normal_np = np.random.normal(loc=[2., 2.5, 2.5], scale=[3., 3, 1.5], size=(10000, 3))
data_raw = zfit.Data.from_numpy(obs=obs, array=normal_np) # or from anywhere else, e.g. root
df = data_raw.to_pandas()
# preprocessing here, rename things. Match column names with the observable names "xobs", "yobs", "z" (they have to be
# contained, more columns in the df is not a problem)
data = zfit.Data.from_pandas(df, obs=obs)
# create NLL
nll = zfit.loss.UnbinnedNLL(model=product_gauss, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
print(result.params)
# do the error calculations, here with minos
param_errors, _ = result.errors()
print(param_errors)
Plot signal background¶
# Copyright (c) 2020 zfit
import mplhep
import numpy as np
import zfit
mplhep.set_style('LHCb2')
import matplotlib.pyplot as plt
plt.rcParams["font.serif"] = "cmr10"
# create space
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu = zfit.Parameter("mu", 1., -4, 6)
sigma = zfit.Parameter("sigma", 1., 0.1, 10)
lambd = zfit.Parameter("lambda", -0.06, -1, -0.01)
frac = zfit.Parameter("fraction", 0.3, 0, 1)
# model building, pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
exponential = zfit.pdf.Exponential(lambd, obs=obs)
model = zfit.pdf.SumPDF([gauss, exponential], fracs=frac)
# data
n_sample = 10000
exp_data = exponential.sample(n=n_sample * (1 - frac)).numpy()
gauss_data = gauss.sample(n=n_sample * frac).numpy()
data = model.create_sampler(n_sample, limits=obs)
data.resample()
mu.set_value(0.5)
sigma.set_value(1.2)
lambd.set_value(-0.05)
frac.set_value(0.07)
# plot the data
data_np = data[:, 0].numpy()
color = 'black'
n_bins = 50
linewidth = 2.5
plot_scaling = n_sample / n_bins * obs.area()
x = np.linspace(-10, 10, 1000)
# plot the pdf BEFORE fitting
plt.figure()
plt.title("Before fitting")
# plot the data
plt.hist(data_np, color=color, bins=n_bins, histtype="stepfilled", alpha=0.1)
plt.hist(data_np, color=color, bins=n_bins, histtype="step")
# plot the pdfs
y = model.pdf(x).numpy()
y_gauss = (gauss.pdf(x) * frac).numpy() # notice the frac!
y_exp = (exponential.pdf(x) * (1 - frac)).numpy() # notice the frac!
plt.plot(x, y * plot_scaling, label="Sum - Model", linewidth=linewidth * 2)
plt.plot(x, y_gauss * plot_scaling, '--', label="Gauss - Signal", linewidth=linewidth)
plt.plot(x, y_exp * plot_scaling, '--', label="Exponential - Background", linewidth=linewidth)
plt.xlabel("Physical observable")
plt.legend()
# create NLL
nll = zfit.loss.UnbinnedNLL(model=model, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
# do the error calculations, here with minos
param_errors, _ = result.errors()
plt.figure()
plt.title("After fitting")
# plot the data
# plt.hist(data_np, color=color, bins=n_bins, histtype="stepfilled", alpha=0.1)
# plt.hist(data_np, color=color, bins=n_bins, histtype="step")
y = model.pdf(x).numpy() # rerun now after the fitting
y_gauss = (gauss.pdf(x) * frac).numpy()
y_exp = (exponential.pdf(x) * (1 - frac)).numpy()
plt.plot(x, y * plot_scaling, label="Sum - Model")
plt.plot(x, y_gauss * plot_scaling, label="Gauss - Signal")
plt.plot(x, y_exp * plot_scaling, label="Exp - Background")
mplhep.histplot(np.histogram(data_np, bins=n_bins), yerr=True, color='black', histtype='errorbar',
markersize=17, capsize=2.5,
markeredgewidth=1.5, zorder=1,
elinewidth=1.5,
)
plt.ylabel("Counts")
plt.xlabel("Physical observable")
plt.legend()
plt.show()
Simple fit¶
# Copyright (c) 2021 zfit
import numpy as np
import zfit
# create space
obs = zfit.Space("x", limits=(-2, 3))
# parameters
mu = zfit.Parameter("mu", 1.2, -4, 6)
sigma = zfit.Parameter("sigma", 1.3, 0.5, 10)
# model building, pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
# data
normal_np = np.random.normal(loc=2., scale=3., size=10000)
data = zfit.Data.from_numpy(obs=obs, array=normal_np)
# create NLL
nll = zfit.loss.UnbinnedNLL(model=gauss, data=data)
# # create a minimizer
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
# do the error calculations with a hessian approximation
param_errors = result.hesse()
# or here with minos
param_errors_asymetric = result.errors()
Signal-background-mass fit¶
# Copyright (c) 2020 zfit
import zfit
# create space
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu = zfit.Parameter("mu", 1., -4, 6)
sigma = zfit.Parameter("sigma", 1., 0.1, 10)
lambd = zfit.Parameter("lambda", -0.06, -1, -0.01)
frac = zfit.Parameter("fraction", 0.3, 0, 1)
# model building, pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
exponential = zfit.pdf.Exponential(lambd, obs=obs)
model = zfit.pdf.SumPDF([gauss, exponential], fracs=frac)
# data
n_sample = 10000
data = model.create_sampler(n_sample, limits=obs)
data.resample()
# set the values to a start value for the fit
mu.set_value(0.5)
sigma.set_value(1.2)
lambd.set_value(-0.05)
frac.set_value(0.07)
# create NLL
nll = zfit.loss.UnbinnedNLL(model=model, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(nll)
print(result)
# do the error calculations, here with minos
param_hesse = result.hesse()
param_errors, new_result = result.errors()
print(result.params)
Signal-background-mass fit extended¶
# Copyright (c) 2021 zfit
import matplotlib.pyplot as plt
import numpy as np
import zfit
n_bins = 50
# create space
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu = zfit.Parameter("mu", 1., -4, 6)
sigma = zfit.Parameter("sigma", 1., 0.1, 10)
lambd = zfit.Parameter("lambda", -0.06, -1, -0.01)
# model building, pdf creation
gauss = zfit.pdf.Gauss(mu=mu, sigma=sigma, obs=obs)
exponential = zfit.pdf.Exponential(lambd, obs=obs)
n_bkg = zfit.Parameter('n_bkg', 20000)
n_sig = zfit.Parameter('n_sig', 1000)
gauss_extended = gauss.create_extended(n_sig)
exp_extended = exponential.create_extended(n_bkg)
model = zfit.pdf.SumPDF([gauss_extended, exp_extended])
# data
# n_sample = 10000
data = model.create_sampler(n=21200)
data.resample()
# set the values to a start value for the fit
mu.set_value(0.5)
sigma.set_value(1.2)
lambd.set_value(-0.05)
# create NLL
nll = zfit.loss.ExtendedUnbinnedNLL(model=model, data=data)
# create a minimizer
minimizer = zfit.minimize.Minuit(verbosity=7)
result = minimizer.minimize(nll)
print(result.params)
# do the error calculations, here with hesse, than with minos
param_hesse = result.hesse()
param_errors, _ = result.errors() # this returns a new FitResult if a new minimum was found
print(result.valid) # check if the result is still valid
Simultanous fit¶
# Copyright (c) 2021 zfit
# create space
import numpy as np
import zfit
obs = zfit.Space("x", limits=(-10, 10))
# parameters
mu_shared = zfit.Parameter("mu_shared", 1., -4, 6) # mu is a shared parameter
sigma1 = zfit.Parameter("sigma_one", 1., 0.1, 10)
sigma2 = zfit.Parameter("sigma_two", 1., 0.1, 10)
# model building, pdf creation
gauss1 = zfit.pdf.Gauss(mu=mu_shared, sigma=sigma1, obs=obs)
gauss2 = zfit.pdf.Gauss(mu=mu_shared, sigma=sigma2, obs=obs)
# data
normal_np = np.random.normal(loc=2., scale=3., size=10000)
data1 = zfit.Data.from_numpy(obs=obs, array=normal_np)
# data
normal_np = np.random.normal(loc=2., scale=4., size=10000)
data2 = zfit.Data.from_numpy(obs=obs, array=normal_np)
# create simultaenous loss, two possibilities
nll_simultaneous = zfit.loss.UnbinnedNLL(model=[gauss1, gauss2],
data=[data1, data2])
# OR, equivalently
nll1 = zfit.loss.UnbinnedNLL(model=gauss1, data=data1)
nll2 = zfit.loss.UnbinnedNLL(model=gauss2, data=data2)
nll_simultaneous2 = nll1 + nll2
minimizer = zfit.minimize.Minuit()
minimizer.minimize(nll_simultaneous2)
Spaces¶
# Copyright (c) 2021 zfit
import zfit
# Addition of limit with the same observables
simple_limit1 = zfit.Space(obs='obs1', limits=(-5, 1))
simple_limit2 = zfit.Space(obs='obs1', limits=(3, 7.5))
added_limits = simple_limit1 + simple_limit2
# OR equivalently
added_limits = simple_limit1.add(simple_limit2)
# multiplication of limits with different observables
first_limit_lower = (-5, 6)
first_limit_upper = (5, 10)
second_limit_lower = (7, 12)
second_limit_upper = (9, 15)
space1 = zfit.Space(obs=['obs1', 'obs2'], limits=(first_limit_lower, first_limit_upper))
space2 = zfit.Space(obs=['obs3', 'obs4'], limits=(second_limit_lower, second_limit_upper))
space4 = space1 * space2
assert space4.obs == ('obs1', 'obs2', 'obs3', 'obs4')
assert space4.n_obs == 4
# retreiving the limits
# for a one dimensional space, we can use `limit1d` as a shortcut
lower, upper = simple_limit1.limit1d
# However, for a higher dimensional space, this won't work. We can use `space.limits`, which is equivalent to
# (space.lower, space.upper).
lower2d, upper2d = space1.limits
# these have shape (nevents, nobs), whereas nevents is usually just 1
assert lower2d.shape == (1, space1.n_obs)
# order to retrieve now the same limits as from the first space, we can use array notation [:, i]
lower1_from2d = lower2d[:, 0]
upper1_from2d = upper2d[:, 0]
assert lower1_from2d == first_limit_lower[0]
assert upper1_from2d == first_limit_upper[0]