"""A rich selection of analytically implemented Distributions (models) are available in `TensorFlow Probability.
<https://github.com/tensorflow/probability>`_. While their API is slightly different from the zfit models, it is similar
enough to be easily wrapped.
Therefore, a convenient wrapper as well as a lot of implementations are provided.
"""
# Copyright (c) 2024 zfit
from __future__ import annotations
from typing import Literal
import tensorflow as tf
import tensorflow_probability as tfp
import tensorflow_probability.python.distributions as tfd
from pydantic import Field
import zfit.z.numpy as znp
from zfit import z
from zfit.util.exception import AnalyticSamplingNotImplemented
from ..core.basepdf import BasePDF
from ..core.interfaces import ZfitData
from ..core.parameter import convert_to_parameter
from ..core.serialmixin import SerializableMixin
from ..core.space import Space, supports
from ..serialization import Serializer, SpaceRepr
from ..serialization.pdfrepr import BasePDFRepr
from ..settings import ztypes
from ..util import ztyping
from ..util.deprecation import deprecated_args
from ..util.ztyping import ExtendedInputType, NormInputType
# TODO: improve? while loop over `.sample`? Maybe as a fallback if not implemented?
def tfd_analytic_sample(n: int, dist: tfd.Distribution, limits: ztyping.ObsTypeInput):
"""Sample analytically with a `tfd.Distribution` within the limits. No preprocessing.
Args:
n: Number of samples to get
dist: Distribution to sample from
limits: Limits to sample from within
Returns:
The sampled data with the number of samples and the number of observables.
"""
lower_bound, upper_bound = limits.v0.limits # not working with MultiSpace
lower_prob_lim = dist.cdf(lower_bound)
upper_prob_lim = dist.cdf(upper_bound)
shape = (n, 1)
prob_sample = z.random.uniform(shape=shape, minval=lower_prob_lim, maxval=upper_prob_lim)
prob_sample.set_shape((None, 1))
try:
sample = dist.quantile(prob_sample)
except NotImplementedError:
raise AnalyticSamplingNotImplemented from None
sample.set_shape((None, limits.n_obs))
return sample
[docs]
class WrapDistribution(BasePDF): # TODO: extend functionality of wrapper, like icdf
"""Baseclass to wrap tensorflow-probability distributions automatically."""
def __init__(
self,
distribution,
dist_params,
obs,
params=None,
dist_kwargs=None,
dtype=ztypes.float,
name=None,
**kwargs,
):
# Check if subclass of distribution?
if dist_kwargs is None:
dist_kwargs = {}
if dist_params is None:
dist_params = {}
name = name or distribution.name
params = dist_params.copy() if params is None else {k: convert_to_parameter(p) for k, p in params.items()}
super().__init__(obs=obs, dtype=dtype, name=name, params=params, **kwargs)
self._distribution = distribution
self.dist_params = dist_params
self.dist_kwargs = dist_kwargs
self._inverse_analytic_integral = []
@property
def distribution(self):
params = self.dist_params
if callable(params):
params = params()
kwargs = self.dist_kwargs
if callable(kwargs):
kwargs = kwargs()
return self._distribution(**params, **kwargs, name=self.name + "_tfp")
def _unnormalized_pdf(self, x: ZfitData):
value = z.unstack_x(x) # TODO: use this? change shaping below?
return self.distribution.prob(value=value, name="unnormalized_pdf")
# TODO: register integral?
@supports()
def _analytic_integrate(self, limits, norm):
del norm # not supported
lower, upper = limits._rect_limits_tf
lower = z.unstack_x(lower)
upper = z.unstack_x(upper)
tf.debugging.assert_all_finite((lower, upper), "Are infinite limits needed? Causes troubles with NaNs")
return self.distribution.cdf(upper) - self.distribution.cdf(lower)
def _analytic_sample(self, n, limits: Space):
return tfd_analytic_sample(n=n, dist=self.distribution, limits=limits)
# class KernelDensityTFP(WrapDistribution):
#
# def __init__(self, loc: ztyping.ParamTypeInput, scale: ztyping.ParamTypeInput, obs: ztyping.ObsTypeInput,
# kernel: tfp.distributions.Distribution = tfp.distributions.Normal,
# weights: Union[None, np.ndarray, tf.Tensor] = None, name: str = "KernelDensity"):
# """Kernel Density Estimation of loc and either a broadcasted or a per-loc scale with a Distribution as kernel.
#
# Args:
# loc: 1-D Tensor-like. The positions of the `kernel`. Determines how many kernels will be created.
# scale: Broadcastable to the batch and event shape of the distribution. A scalar will simply broadcast
# to `loc` for a 1-D distribution.
# obs: Observables
# kernel: Distribution that is used as kernel
# weights: Weights of each `loc`, can be None or Tensor-like with shape compatible with loc
# name: Name of the PDF
# """
# if not isinstance(kernel,
# tfp.distributions.Distribution) and False: # HACK remove False, why does test not work?
# raise TypeError("Currently, only tfp distributions are supported as kernels. Please open an issue if this "
# "is too restrictive.")
#
# if isinstance(loc, ZfitData):
# if loc.weights is not None:
# if weights is not None:
# raise OverdefinedError("Cannot specify weights and use a `ZfitData` with weights.")
# else:
# weights = loc.weights
#
# if weights is None:
# weights = tf.ones_like(loc, dtype=tf.float64)
# self._weights_loc = weights
# self._weights_sum = z.reduce_sum(weights)
# self._latent_loc = loc
# params = {"scale": scale}
# dist_params = {"loc": loc, "scale": scale}
# super().__init__(distribution=kernel, dist_params=dist_params, obs=obs, params=params, dtype=ztypes.float,
# name=name)
#
# def _unnormalized_pdf(self, x: "zfit.Data", norm_range=False):
# value = znp.expand_dims(x.value(), -2)
# new_shape = znp.concatenate([tf.shape(value)[:2], [tf.shape(self._latent_loc)[0], 4]], axis=0)
# value = tf.broadcast_to(value, new_shape)
# probs = self.distribution.prob(value=value, name="unnormalized_pdf")
# # weights = znp.expand_dims(self._weights_loc, axis=-1)
# weights = self._weights_loc
# probs = z.reduce_sum(probs * weights, axis=-1) / self._weights_sum
# return probs
#
# @supports()
# def _analytic_integrate(self, limits, norm_range):
# lower, upper = limits.limits
# if np.all(-np.array(lower) == np.array(upper)) and np.all(np.array(upper) == np.infty):
# return z.reduce_sum(self._weights_loc) # tfp distributions are normalized to 1
# lower = z.to_real(lower[0], dtype=self.dtype)
# # lower = tf.broadcast_to(lower, shape=(tf.shape(self._latent_loc)[0], limits.n_obs,)) # remove
# upper = z.to_real(upper[0], dtype=self.dtype)
# integral = self.distribution.cdf(upper) - self.distribution.cdf(lower)
# integral = z.reduce_sum(integral * self._weights_loc, axis=-1) / self._weights_sum
# return integral # TODO: generalize for VectorSpaces
[docs]
class Gauss(WrapDistribution, SerializableMixin):
_N_OBS = 1
def __init__(
self,
mu: ztyping.ParamTypeInput,
sigma: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "Gauss",
label=None,
):
"""Gaussian or Normal distribution with a mean (mu) and a standartdeviation (sigma).
The gaussian shape is defined as
.. math::
f(x \\mid \\mu, \\sigma^2) = e^{ -\\frac{(x - \\mu)^{2}}{2\\sigma^2} }
with the normalization over [-inf, inf] of
.. math::
\\frac{1}{\\sqrt{2\\pi\\sigma^2} }
The normalization changes for different normalization ranges
Args:
mu: Mean of the gaussian dist
sigma: Standard deviation or spread of the gaussian
obs: |@doc:pdf.init.obs| Observables of the
model. This will be used as the default space of the PDF and,
if not given explicitly, as the normalization range.
The default space is used for example in the sample method: if no
sampling limits are given, the default space is used.
If the observables are binned and the model is unbinned, the
model will be a binned model, by wrapping the model in a
:py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
The observables are not equal to the domain as it does not restrict or
truncate the model outside this range. |@docend:pdf.init.obs|
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
mu, sigma = self._check_input_params_tfp(mu, sigma)
params = {"mu": mu, "sigma": sigma}
def dist_params():
return {"loc": mu.value(), "scale": sigma.value()}
distribution = tfp.distributions.Normal
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class GaussPDFRepr(BasePDFRepr):
_implementation = Gauss
hs3_type: Literal["Gauss"] = Field("Gauss", alias="type")
x: SpaceRepr
mu: Serializer.types.ParamInputTypeDiscriminated
sigma: Serializer.types.ParamInputTypeDiscriminated
class ExponentialTFP(WrapDistribution):
_N_OBS = 1
def __init__(
self,
tau: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
name: str = "Exponential",
):
(tau,) = self._check_input_params_tfp(tau)
params = {"tau", tau}
dist_params = {"rate": tau}
distribution = tfp.distributions.Exponential
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
)
[docs]
class TruncatedGauss(WrapDistribution):
_N_OBS = 1
def __init__(
self,
mu: ztyping.ParamTypeInput,
sigma: ztyping.ParamTypeInput,
low: ztyping.ParamTypeInput,
high: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "TruncatedGauss",
label: str | None = None,
):
"""Gaussian distribution that is 0 outside of `low`, `high`. Equivalent to the product of Gauss and Uniform.
Args:
mu: Mean of the gaussian dist
sigma: Standard deviation or spread of the gaussian
low: Below this value, the pdf is zero.
high: Above this value, the pdf is zero.
obs: Observables and normalization range the pdf is defined in
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
mu, sigma, low, high = self._check_input_params_tfp(mu, sigma, low, high)
params = {"mu": mu, "sigma": sigma, "low": low, "high": high}
distribution = tfp.distributions.TruncatedNormal
def dist_params():
return {
"loc": mu.value(),
"scale": sigma.value(),
"low": low.value(),
"high": high.value(),
}
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
[docs]
class Cauchy(WrapDistribution, SerializableMixin):
_N_OBS = 1
def __init__(
self,
m: ztyping.ParamTypeInput,
gamma: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "Cauchy",
label: str | None = None,
):
r"""Non-relativistic Breit-Wigner (Cauchy) PDF representing the energy distribution of a decaying particle.
The (unnormalized) shape of the non-relativistic Breit-Wigner is given by
.. math::
\frac{1}{\gamma \left[1 + \left(\frac{x - m}{\gamma}\right)^2\right]}
with :math:`m` the mean and :math:`\gamma` the width of the distribution.
Args:
m: Invariant mass of the unstable particle.
gamma: Width of the shape.
obs: Observables and normalization range the pdf is defined in
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
m, gamma = self._check_input_params_tfp(m, gamma)
params = {"m": m, "gamma": gamma}
distribution = tfp.distributions.Cauchy
def dist_params():
return {"loc": m.value(), "scale": gamma.value()}
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class CauchyPDFRepr(BasePDFRepr):
_implementation = Cauchy
hs3_type: Literal["Cauchy"] = Field("Cauchy", alias="type")
x: SpaceRepr
m: Serializer.types.ParamTypeDiscriminated
gamma: Serializer.types.ParamTypeDiscriminated
[docs]
class Poisson(WrapDistribution, SerializableMixin):
_N_OBS = 1
@deprecated_args(None, "Use lam instead", "lamb")
def __init__(
self,
lam: ztyping.ParamTypeInput = None,
obs: ztyping.ObsTypeInput = None,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "Poisson",
lamb=None,
label: str | None = None,
):
"""Poisson distribution, parametrized with an event rate parameter (lamb).
The probability mass function of the Poisson distribution is given by
.. math::
f(x, \\lambda) = \\frac{\\lambda^{x}e^{-\\lambda}}{x!}
Args:
lamb: the event rate
obs: Observables and normalization range the pdf is defined in
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
if lamb is not None:
lam = lamb
del lamb
(lam,) = self._check_input_params_tfp(lam)
params = {"lam": lam}
def dist_params():
return {"rate": lam.value()}
distribution = tfp.distributions.Poisson
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class PoissonPDFRepr(BasePDFRepr):
_implementation = Poisson
hs3_type: Literal["Poisson"] = Field("Poisson", alias="type")
x: SpaceRepr
lam: Serializer.types.ParamTypeDiscriminated
[docs]
class LogNormal(WrapDistribution, SerializableMixin):
_N_OBS = 1
def __init__(
self,
mu: ztyping.ParamTypeInput,
sigma: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "LogNormal",
label: str | None = None,
):
r"""Log-normal distribution, the exponential of a normal distribution.
The probability density function of the log-normal distribution is only defined for positive values and
is given by
.. math::
f(x \\mid \mu, \sigma) = \frac{1}{x \sigma \sqrt{2\pi}} e^{-\frac{(\ln(x) - \mu)^2}{2\sigma^2}}
with :math:`\mu` the mean and :math:`\sigma` the standard deviation of the underlying normal distribution.
Args:
mu: Mean of the underlying normal distribution.
sigma: Standard deviation of the underlying normal distribution.
obs: Observables and normalization range the pdf is defined in
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
mu, sigma = self._check_input_params_tfp(mu, sigma)
params = {"mu": mu, "sigma": sigma}
def dist_params():
return {"loc": mu.value(), "scale": sigma.value()}
distribution = tfp.distributions.LogNormal
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class LogNormalPDFRepr(BasePDFRepr):
_implementation = LogNormal
hs3_type: Literal["LogNormal"] = Field("LogNormal", alias="type")
x: SpaceRepr
mu: Serializer.types.ParamTypeDiscriminated
sigma: Serializer.types.ParamTypeDiscriminated
[docs]
class ChiSquared(WrapDistribution, SerializableMixin):
_N_OBS = 1
def __init__(
self,
ndof: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "ChiSquared",
label: str | None = None,
):
"""ChiSquared distribution for ndof degrees of freedom.
The chisquared shape for `d` degrees of freedom is defined as
.. math::
f(x \\mid d) = x^(d/2 - 1) \\exp(-x/2) / Z
with the normalization over [0, inf] of
.. math::
Z = \\frac{1}{2^{d/2} \\Gamma(d/2)}
The normalization changes for different normalization ranges
Args:
ndof: Number of degrees of freedom
obs: Observables and normalization range the pdf is defined in
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
(ndof,) = self._check_input_params_tfp(ndof)
params = {"ndof": ndof}
def dist_params():
return {"df": ndof.value()}
distribution = tfp.distributions.Chi2
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class ChiSquaredPDFRepr(BasePDFRepr):
_implementation = ChiSquared
hs3_type: Literal["ChiSquared"] = Field("ChiSquared", alias="type")
x: SpaceRepr
ndof: Serializer.types.ParamTypeDiscriminated
[docs]
class StudentT(WrapDistribution, SerializableMixin):
_N_OBS = 1
def __init__(
self,
ndof: ztyping.ParamTypeInput,
mu: ztyping.ParamTypeInput,
sigma: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "StudentT",
label: str | None = None,
):
"""StudentT distribution for ndof degrees of freedom.
The StudentT shape for `d` degrees of freedom is defined as
.. math::
f(x \\mid d, \\mu, \\sigma) = \\left(1 + \\frac{1}{d} \\left(\\frac{x - \\mu}{\\sigma}\\right)^2\\right)^{-\\frac{d+1}{2}} / Z
with the normalization over [-inf, inf] of
.. math::
Z = \\frac{\\sqrt{d \\pi} \\Gamma(\\frac{d}{2})}{\\Gamma(\\frac{d+1}{2})}
The normalization changes for different normalization ranges
Args:
ndof: Number of degrees of freedom
mu: Mean of the distribution
sigma: Scale of the distribution
obs: |@doc:pdf.init.obs| Observables of the
model. This will be used as the default space of the PDF and,
if not given explicitly, as the normalization range.
The default space is used for example in the sample method: if no
sampling limits are given, the default space is used.
If the observables are binned and the model is unbinned, the
model will be a binned model, by wrapping the model in a
:py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
The observables are not equal to the domain as it does not restrict or
truncate the model outside this range. |@docend:pdf.init.obs|
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
ndof, mu, sigma = self._check_input_params_tfp(ndof, mu, sigma)
params = {"ndof": ndof, "mu": mu, "sigma": sigma}
def dist_params():
return {"df": ndof.value(), "loc": mu.value(), "scale": sigma.value()}
distribution = tfp.distributions.StudentT
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class StudentTPDFRepr(BasePDFRepr):
_implementation = StudentT
hs3_type: Literal["StudentT"] = Field("StudentT", alias="type")
x: SpaceRepr
ndof: Serializer.types.ParamTypeDiscriminated
mu: Serializer.types.ParamTypeDiscriminated
sigma: Serializer.types.ParamTypeDiscriminated
[docs]
class QGauss(WrapDistribution, SerializableMixin):
_N_OBS = 1
def __init__(
self,
q: ztyping.ParamTypeInput,
mu: ztyping.ParamTypeInput,
sigma: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "QGauss",
label: str | None = None,
):
"""Q-Gaussian distribution with parameter `q`.
The q-Gaussian is a probability distribution arising from the maximization of the Tsallis entropy under appropriate constraints.
It is defined for q < 3 and the Gaussian distribution is recovered as q -> 1.
For q < 1, is it the PDF of a bounded random variable.
We only support 1 < q < 3 in this implementation.
If you want to use exactly q = 1, use the `zfit.pdf.Gauss` class.
During fitting, if you want to start from a Gaussian shape, you can initialize the `q` parameter to be really close to 1.
It is related to the Student's t-distribution according to the `corresponding Wikipedia entry <https://en.wikipedia.org/wiki/Q-Gaussian_distribution#Student's_t-distribution>`_
and that is how it is implemented here.
The q-Gaussian shape for 1 < q < 3 is defined as
.. math::
f(x \\mid q, \\mu, \\sigma) = \\frac{1}{C_{q} \\sigma} e_{q}\\left(-\\left(\\frac{x - \\mu}{\\sigma}\\right)^{2}\\right)
with
.. math::
e_q(x) = \\left[1 + (1 - q) x\\right]_{+}^{\\frac{1}{1 - q}}
and the normalization over [-inf, inf] of
.. math::
C_{q} = \\frac{\\sqrt{\\pi} \\Gamma \\left(\\frac{3 - q}{2 (q - 1)}\\right)}{\\sqrt{q - 1}\\Gamma \\left(\\frac{1}{q - 1}\\right)}
The normalization changes for different normalization ranges
Args:
q: Shape parameter of the q-Gaussian. Must be 1 < q < 3.
mu: Mean of the distribution
sigma: Scale of the distribution
obs: |@doc:pdf.init.obs| Observables of the
model. This will be used as the default space of the PDF and,
if not given explicitly, as the normalization range.
The default space is used for example in the sample method: if no
sampling limits are given, the default space is used.
If the observables are binned and the model is unbinned, the
model will be a binned model, by wrapping the model in a
:py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
The observables are not equal to the domain as it does not restrict or
truncate the model outside this range. |@docend:pdf.init.obs|
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
from zfit import run
q, mu, sigma = self._check_input_params_tfp(q, mu, sigma)
if run.executing_eagerly():
if q < 1 or q > 3:
msg = "q < 1 or q > 3 are not supported"
raise ValueError(msg)
if q == 1:
msg = "q = 1 is a Gaussian, use Gauss instead."
raise ValueError(msg)
elif run.numeric_checks:
tf.debugging.assert_greater(q, znp.asarray(1.0), "q must be > 1")
tf.debugging.assert_less(q, znp.asarray(3.0), "q must be < 3")
params = {"q": q, "mu": mu, "sigma": sigma}
# https://en.wikipedia.org/wiki/Q-Gaussian_distribution
# relation to Student's t-distribution
# 1/(2 sigma^2) = 1 / (3 - q)
# 2 sigma^2 = 3 - q
# sigma = sqrt((3 - q)/2)
def dist_params(q=q, mu=mu, sigma=sigma):
if run.numeric_checks:
tf.debugging.assert_greater(q, znp.asarray(1.0), "q must be > 1")
tf.debugging.assert_less(q, znp.asarray(3.0), "q must be < 3")
df = (3 - q.value()) / (q.value() - 1)
scale = sigma.value() / tf.sqrt(0.5 * (3 - q.value()))
return {"df": df, "loc": mu.value(), "scale": scale}
distribution = tfp.distributions.StudentT
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class QGaussPDFRepr(BasePDFRepr):
_implementation = QGauss
hs3_type: Literal["QGauss"] = Field("QGauss", alias="type")
x: SpaceRepr
q: Serializer.types.ParamTypeDiscriminated
mu: Serializer.types.ParamTypeDiscriminated
sigma: Serializer.types.ParamTypeDiscriminated
[docs]
class BifurGauss(WrapDistribution, SerializableMixin):
_N_OBS = 1
def __init__(
self,
mu: ztyping.ParamTypeInput,
sigmal: ztyping.ParamTypeInput,
sigmar: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "BifurGauss",
label: str | None = None,
):
"""Bifurcated Gaussian distribution different standard deviations for the left and right side of the mean.
The bifurcated Gaussian shape is defined as
.. math::
f(x \\mid \\mu, \\sigma_{L}, \\sigma_{R}) = \\begin{cases}
A \\exp{\\left(-\\frac{(x - \\mu)^2}{2 \\sigma_{L}^2}\\right)}, & \\mbox{for } x < \\mu \\newline
A \\exp{\\left(-\\frac{(x - \\mu)^2}{2 \\sigma_{R}^2}\\right)}, & \\mbox{for } x \\geq \\mu
\\end{cases}
with the normalization over [-inf, inf] of
.. math::
A = \\sqrt{\\frac{2}{\\pi}} \\frac{1}{\\sigma_{L} + \\sigma_{R}}
The normalization changes for different normalization ranges
Args:
mu: Mean of the distribution
sigmal: Standard deviation on the left side of the mean
sigmar: Standard deviation for the right side of the mean
obs: |@doc:pdf.init.obs| Observables of the
model. This will be used as the default space of the PDF and,
if not given explicitly, as the normalization range.
The default space is used for example in the sample method: if no
sampling limits are given, the default space is used.
If the observables are binned and the model is unbinned, the
model will be a binned model, by wrapping the model in a
:py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
The observables are not equal to the domain as it does not restrict or
truncate the model outside this range. |@docend:pdf.init.obs|
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
mu, sigmal, sigmar = self._check_input_params_tfp(mu, sigmal, sigmar)
params = {"mu": mu, "sigmal": sigmal, "sigmar": sigmar}
# sigmal = scale / skewness
# sigmar = scale * skewness
# scale = sigmal * skewness
# sigmar = sigmal * skewness^2
# skewness = sqrt(sigmar / sigmal)
# scale = sigmal * sqrt(sigmar / sigmal)
def dist_params():
scale = sigmal.value() * znp.sqrt(sigmar.value() / sigmal.value())
skewness = znp.sqrt(sigmar.value() / sigmal.value())
return {"loc": mu.value(), "scale": scale, "skewness": skewness}
distribution = tfp.distributions.TwoPieceNormal
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class BifurGaussPDFRepr(BasePDFRepr):
_implementation = BifurGauss
hs3_type: Literal["BifurGauss"] = Field("BifurGauss", alias="type")
x: SpaceRepr
mu: Serializer.types.ParamTypeDiscriminated
sigmal: Serializer.types.ParamTypeDiscriminated
sigmar: Serializer.types.ParamTypeDiscriminated
[docs]
class Gamma(WrapDistribution, SerializableMixin):
_N_OBS = 1
def __init__(
self,
gamma: ztyping.ParamTypeInput,
beta: ztyping.ParamTypeInput,
mu: ztyping.ParamTypeInput,
obs: ztyping.ObsTypeInput,
*,
extended: ExtendedInputType = None,
norm: NormInputType = None,
name: str = "Gamma",
label: str | None = None,
):
"""Gamma distribution.
The gamma shape is parametrized here with `gamma`, `beta` and `mu`, following
the same parametrization `as RooFit <https://root.cern.ch/doc/master/classRooGamma.html>`_.
The gamma shape is defined as
.. math::
f(x \\mid \\gamma, \\beta, \\mu) = (x - \\mu)^{\\gamma - 1} \\exp{\\left(-\\frac{x - \\mu}{\\beta}\\right)} / Z
with the normalization over [0, inf] of
.. math::
Z = \\Gamma(\\gamma) \\beta^{\\gamma}
The normalization changes for different normalization ranges and `Z=1` for the unnormalized shape.
Args:
gamma: Shape parameter of the gamma distribution
beta: Scale parameter of the gamma distribution
mu: Shift of the distribution
obs: |@doc:pdf.init.obs| Observables of the
model. This will be used as the default space of the PDF and,
if not given explicitly, as the normalization range.
The default space is used for example in the sample method: if no
sampling limits are given, the default space is used.
If the observables are binned and the model is unbinned, the
model will be a binned model, by wrapping the model in a
:py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to
calling :py:meth:`~zfit.pdf.BasePDF.to_binned`.
The observables are not equal to the domain as it does not restrict or
truncate the model outside this range. |@docend:pdf.init.obs|
extended: |@doc:pdf.init.extended| The overall yield of the PDF.
If this is parameter-like, it will be used as the yield,
the expected number of events, and the PDF will be extended.
An extended PDF has additional functionality, such as the
``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended|
norm: |@doc:pdf.init.norm| Normalization of the PDF.
By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm|
name: |@doc:pdf.init.name| Name of the PDF.
Maybe has implications on the serialization and deserialization of the PDF.
For a human-readable name, use the label. |@docend:pdf.init.name|
label: |@doc:pdf.init.label| Human-readable name
or label of
the PDF for a better description, to be used with plots etc.
Has no programmatical functional purpose as identification. |@docend:pdf.init.label|
"""
gamma, beta, mu = self._check_input_params_tfp(gamma, beta, mu)
params = {"gamma": gamma, "beta": beta, "mu": mu}
def dist_params():
return {"concentration": gamma.value(), "rate": 1 / beta.value(), "loc": mu.value()}
def distribution(concentration, rate, loc, name):
return tfd.TransformedDistribution(
distribution=tfp.distributions.Gamma(concentration, rate),
bijector=tfp.bijectors.Shift(loc),
name=name,
)
super().__init__(
distribution=distribution,
dist_params=dist_params,
obs=obs,
params=params,
name=name,
extended=extended,
norm=norm,
label=label,
)
class GammaPDFRepr(BasePDFRepr):
_implementation = Gamma
hs3_type: Literal["Gamma"] = Field("Gamma", alias="type")
x: SpaceRepr
gamma: Serializer.types.ParamTypeDiscriminated
beta: Serializer.types.ParamTypeDiscriminated
mu: Serializer.types.ParamTypeDiscriminated