Source code for zfit.core.basepdf

"""This  module defines the ``BasePdf`` that can be used to inherit from in order to build a custom PDF.

The ``BasePDF`` implements already a lot of ready-to-use functionality like integral, automatic normalization
and sampling.

Defining your own pdf
---------------------

A simple example:
>>> import zfit
>>> import zfit.z.numpy as znp
>>>
>>> class MyGauss(BasePDF):
>>>     def __init__(self, mean, stddev, name="MyGauss"):
>>>         super().__init__(mean=mean, stddev=stddev, name=name)
>>>
>>>     def _unnormalized_pdf(self, x):
>>>         return znp.exp((x - mean) ** 2 / (2 * stddev**2))

Notice that *here* we only specify the *function* and no normalization. This
**No** attempt to **explicitly** normalize the function should be done inside ``_unnormalized_pdf``.
The normalization is handled with another method depending on the normalization range specified.
(It *is* possible, though discouraged, to directly provide the *normalized probability* by overriding _pdf(), but
there are other, more convenient ways to add improvements like providing an analytical integrals.)

Before we create an instance, we need to create the variables to initialize it
>>> mean = zfit.Parameter("mean1", 2., 0.1, 4.2)  # signature as in RooFit: *name, initial, lower, upper*
>>> stddev = zfit.Parameter("stddev1", 5., 0.3, 10.)
Let's create an instance and some example data
>>> gauss = MyGauss(mean=mean, stddev=stddev)
>>> example_data = np.random.random(10)
Now we can get the probability
>>> probs = gauss.pdf(example_data)  # ``norm`` specifies over which range to normalize
Or the integral
>>> integral = gauss.integrate(limits=(-5, 3.1),norm=False)  # norm_range is False -> return unnormalized
integral
Or directly sample from it
>>> sample = gauss.sample(n_draws=1000, limits=(-10, 10))  # draw 1000 samples within (-10, 10)

We can create an extended PDF, which will result in anything using a ``norm_range`` to not return the
probability but the number probability (the function will be normalized to ``yield`` instead of 1 inside
the ``norm_range``)
>>> yield1 = Parameter("yield1", 100, 0, 1000)
>>> gauss_extended = gauss.create_extended(yield1)
>>> gauss.is_extended
True

>>> integral_extended = gauss.ext_integrate(limits=(-10, 10),norm=(-10, 10))  # yields approx 100

For more advanced methods and ways to register analytic integrals or overwrite certain methods, see
also the advanced models in `zfit models <https://github.com/zfit/zfit-tutorials>`_
"""

#  Copyright (c) 2024 zfit

from __future__ import annotations

from typing import TYPE_CHECKING, Iterable, Optional

from tensorflow.python.util.deprecation import deprecated_args

from ..util.plotter import PDFPlotter
from ..util.ztyping import ExtendedInputType, NormInputType

if TYPE_CHECKING:
    pass

import warnings
from contextlib import suppress

import tensorflow as tf

import zfit.z.numpy as znp
from zfit import z

from ..settings import run, ztypes
from ..util import ztyping
from ..util.cache import invalidate_graph
from ..util.deprecation import deprecated, deprecated_norm_range
from ..util.exception import (
    AlreadyExtendedPDFError,
    BreakingAPIChangeError,
    FunctionNotImplemented,
    NormNotImplemented,
    NotExtendedPDFError,
    SpecificFunctionNotImplemented,
)
from ..util.temporary import TemporarilySet
from .basemodel import BaseModel
from .baseobject import extract_filter_params
from .interfaces import ZfitParameter, ZfitPDF, ZfitSpace
from .parameter import Parameter, convert_to_parameter
from .sample import extended_sampling
from .space import Space, convert_to_space

_BasePDF_USER_IMPL_METHODS_TO_CHECK = {}


def _BasePDF_register_check_support(has_support: bool):
    """Marks a method that the subclass either *has* to or *can't* use the ``@supports`` decorator.

    Args:
        has_support: If True, flags that it **requires** the ``@supports`` decorator. If False,
            flags that the ``@supports`` decorator is **not allowed**.
    """
    if not isinstance(has_support, bool):
        msg = "Has to be boolean."
        raise TypeError(msg)

    def register(func):
        """Register a method to be checked to (if True) *has* ``support`` or (if False) has *no* ``support``.

        Args:
            func:

        Returns:
            Function:
        """
        name = func.__name__
        _BasePDF_USER_IMPL_METHODS_TO_CHECK[name] = has_support
        func.__wrapped__ = _BasePDF_register_check_support
        return func

    return register


class PDFMeta(type):
    def __call__(cls, *args, obs=None, **kwargs):
        if binned := (obs is not None and isinstance(obs, Space) and obs.binning is not None):
            binned_obs = obs
            obs = binned_obs.with_binning(None)
        if obs is not None:
            kwargs["obs"] = obs
        pdf = cls.__new__(cls)
        pdf.__init__(*args, **kwargs)
        if binned:
            pdf = pdf.to_binned(
                binned_obs,
                extended=kwargs.get("extended", None),
                norm=kwargs.get("norm", None),
                name=kwargs.get("name", None),
                label=kwargs.get("label", None),
            )

        return pdf


[docs] class BasePDF(ZfitPDF, BaseModel, metaclass=PDFMeta): def __init__( self, obs: ztyping.ObsTypeInput, params: dict[str, ZfitParameter] | None = None, *, dtype=ztypes.float, label=None, extended: ExtendedInputType = None, norm: NormInputType = None, name: str = "BasePDF", **kwargs, ): self._yield = None self.plot = None super().__init__(obs=obs, dtype=dtype, name=name, params=params, **kwargs) self._label = label or self.name self._norm = norm if extended is not False and extended is not None: self._set_yield(extended) self._assert_params_unique() if self.plot is None: self.plot = PDFPlotter(self) def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) cls._subclass_check_support( methods_to_check=_BasePDF_USER_IMPL_METHODS_TO_CHECK, wrapper_not_overwritten=_BasePDF_register_check_support, ) def _check_input_norm(self, norm, none_is_error=False): if norm is None: norm = self.norm return super()._check_input_norm(norm=norm, none_is_error=none_is_error) def _check_input_params_tfp(self, *params): return tuple(convert_to_parameter(p) for p in params) def _func_to_integrate(self, x: ztyping.XType): return self.pdf(x, norm=False) def _func_to_sample_from(self, x): return self.pdf(x, norm=False) @property def label(self): return self._label @property @deprecated(None, "Use the `norm` attribute instead.") def norm_range(self) -> Space | None | bool: """Return the current normalization range. If None and the ``obs`` have limits, they are returned. Returns: The current normalization range. """ return self.norm @property def norm(self) -> Space | None | bool: """Return the current normalization range. If None and the ``obs`` have limits, they are returned. Returns: The current normalization range. """ norm = self._norm if norm is None: norm = self.space return norm
[docs] @invalidate_graph @deprecated(None, "Prefer to create a new PDF with `norm` set or wrap the existing in a `TruncatedPDF`.") def set_norm_range(self, norm: ztyping.LimitsTypeInput): """Set the normalization range (temporarily if used with contextmanager). Args: norm: """ norm = self._check_input_norm(norm) def setter(value): self._norm = value def getter(): return self._norm return TemporarilySet(value=norm, setter=setter, getter=getter)
@_BasePDF_register_check_support(True) def _normalization(self, norm, options, *, params=None): # noqa: ARG002 raise SpecificFunctionNotImplemented
[docs] @deprecated_args(None, "Use `norm` instead.", "limits") def normalization( self, norm: ztyping.LimitsType = None, *, options=None, limits: ztyping.LimitsType = None, params: ztyping.ParamsTypeOpt = None, ) -> ztyping.XType: """Return the normalization of the function (usually the integral over ``norm``). Args: norm: |@doc:pdf.param.norm| Normalization of the function. By default, this is the ``norm`` of the PDF (which by default is the same as the space of the PDF). Should be ``ZfitSpace`` to define the space to normalize over. |@docend:pdf.param.norm| options: |@doc:pdf.param.options||@docend:pdf.param.options| params: |@doc:model.args.params| Mapping of the parameter names to the actual values. The parameter names refer to the names of the parameters, typically :py:class:`~zfit.Parameter`, that the model was _initialized_ with, not the name of the models parametrization. |@docend:model.args.params| Returns: The normalization value """ del limits if options is None: options = {} norm = self._check_input_norm(norm) with self._check_set_input_params(params=params): return self._single_hook_normalization(norm=norm, options=options)
def _single_hook_normalization(self, norm, options): # TODO(Mayou36): add yield? return self._hook_normalization(norm=norm, options=options) def _hook_normalization(self, norm, options): return self._call_normalization(norm=norm, options=options) # no _norm_* needed def _call_normalization(self, norm, options): # TODO: caching? alternative with suppress(FunctionNotImplemented): return self._normalization(norm=norm, options=options) return self._fallback_normalization(norm, options=options) def _fallback_normalization(self, norm, options): return self._hook_integrate(limits=norm, norm=False, options=options) def _unnormalized_pdf(self, x, *, params=None): # noqa: ARG002 raise SpecificFunctionNotImplemented
[docs] @deprecated(None, "Use `pdf(norm=False)` instead") def unnormalized_pdf(self, x: ztyping.XType) -> ztyping.XType: """PDF "unnormalized". Use ``functions`` for unnormalized pdfs. this is only for performance in special cases. Args: x: |@doc:pdf.param.x| Data to evaluate the method on. Should be ``ZfitData`` or a mapping of *obs* to numpy-like arrays. If an array is given, the first dimension is interpreted as the events while the second is meant to be the dimensionality of a single event. |@docend:pdf.param.x| Returns: 1-dimensional :py:class:`tf.Tensor` containing the unnormalized pdf. """ with self._convert_sort_x(x) as x: return self._single_hook_unnormalized_pdf(x)
def _single_hook_unnormalized_pdf(self, x): return self._call_unnormalized_pdf(x=x) def _call_unnormalized_pdf(self, x): # try: return self._unnormalized_pdf(x) @z.function(wraps="model") @deprecated_norm_range def ext_pdf( self, x: ztyping.XTypeInput, norm: ztyping.LimitsTypeInput = None, *, params: ztyping.ParamsTypeOpt = None, ) -> ztyping.XType: """Probability density function scaled by yield, normalized over ``norm_range``. Args: x: |@doc:pdf.param.x| Data to evaluate the method on. Should be ``ZfitData`` or a mapping of *obs* to numpy-like arrays. If an array is given, the first dimension is interpreted as the events while the second is meant to be the dimensionality of a single event. |@docend:pdf.param.x| norm: |@doc:pdf.param.norm| Normalization of the function. By default, this is the ``norm`` of the PDF (which by default is the same as the space of the PDF). Should be ``ZfitSpace`` to define the space to normalize over. |@docend:pdf.param.norm| params: |@doc:model.args.params| Mapping of the parameter names to the actual values. The parameter names refer to the names of the parameters, typically :py:class:`~zfit.Parameter`, that the model was _initialized_ with, not the name of the models parametrization. |@docend:model.args.params| Returns: :py:class:`tf.Tensor` of type `self.dtype`. """ norm = self._check_input_norm(norm, none_is_error=True) if not self.is_extended: msg = f"{self} is not extended, cannot call `ext_pdf`" raise NotExtendedPDFError(msg) with self._convert_sort_x(x) as x, self._check_set_input_params(params=params): return self._call_ext_pdf(x, norm) def _call_ext_pdf(self, x, norm): with suppress(SpecificFunctionNotImplemented): return self._auto_ext_pdf(x, norm) # fallback return self.pdf(x=x, norm=norm) * self.get_yield() def _auto_ext_pdf(self, x, norm): try: probs = self._ext_pdf(x, norm) except NormNotImplemented: unnorm_probs = self._ext_pdf(x, False) normalization = self.normalization(norm) probs = unnorm_probs / normalization return probs @_BasePDF_register_check_support(True) def _ext_pdf(self, x, norm, *, norm_range=None, params=None): # noqa: ARG002 raise SpecificFunctionNotImplemented # TODO: implement properly @z.function(wraps="model") @deprecated_norm_range def ext_log_pdf( self, x: ztyping.XTypeInput, norm: ztyping.LimitsTypeInput = None, *, params: ztyping.ParamsTypeOpt = None, ) -> ztyping.XType: """Log of probability density function scaled by yield, normalized over ``norm_range``. Args: x: |@doc:pdf.param.x| Data to evaluate the method on. Should be ``ZfitData`` or a mapping of *obs* to numpy-like arrays. If an array is given, the first dimension is interpreted as the events while the second is meant to be the dimensionality of a single event. |@docend:pdf.param.x| norm: |@doc:pdf.param.norm| Normalization of the function. By default, this is the ``norm`` of the PDF (which by default is the same as the space of the PDF). Should be ``ZfitSpace`` to define the space to normalize over. |@docend:pdf.param.norm| params: |@doc:model.args.params| Mapping of the parameter names to the actual values. The parameter names refer to the names of the parameters, typically :py:class:`~zfit.Parameter`, that the model was _initialized_ with, not the name of the models parametrization. |@docend:model.args.params| Returns: :py:class:`tf.Tensor` of type `self.dtype`. """ norm = self._check_input_norm(norm, none_is_error=True) if not self.is_extended: msg = f"{self} is not extended, cannot call `ext_pdf`" raise NotExtendedPDFError(msg) with self._convert_sort_x(x) as x, self._check_set_input_params(params=params): return self._call_ext_log_pdf(x, norm) def _call_ext_log_pdf(self, x, norm): with suppress(SpecificFunctionNotImplemented): return self._auto_ext_log_pdf(x, norm) # fallback return self.log_pdf(x=x, norm=norm) + znp.log(self.get_yield()) def _auto_ext_log_pdf(self, x, norm): try: pdf = self._ext_log_pdf(x, norm) except NormNotImplemented: unnormed_pdf = self._ext_log_pdf(x, False) normalization = self.log_normalization(norm) pdf = unnormed_pdf - normalization return pdf @_BasePDF_register_check_support(True) def _ext_log_pdf(self, x, norm): # noqa: ARG002 raise SpecificFunctionNotImplemented @_BasePDF_register_check_support(True) def _pdf(self, x, norm, *, norm_range=None, params=None): # noqa: ARG002 raise SpecificFunctionNotImplemented @z.function(wraps="model") @deprecated_norm_range def pdf( self, x: ztyping.XTypeInput, norm: ztyping.LimitsTypeInput = None, *, params: ztyping.ParamsTypeOpt = None, ) -> ztyping.XType: """Probability density function of ``x``, normalized over ``norm``. Args: x: |@doc:pdf.param.x| Data to evaluate the method on. Should be ``ZfitData`` or a mapping of *obs* to numpy-like arrays. If an array is given, the first dimension is interpreted as the events while the second is meant to be the dimensionality of a single event. |@docend:pdf.param.x| norm: |@doc:pdf.param.norm| Normalization of the function. By default, this is the ``norm`` of the PDF (which by default is the same as the space of the PDF). Should be ``ZfitSpace`` to define the space to normalize over. |@docend:pdf.param.norm| params: |@doc:model.args.params| Mapping of the parameter names to the actual values. The parameter names refer to the names of the parameters, typically :py:class:`~zfit.Parameter`, that the model was _initialized_ with, not the name of the models parametrization. |@docend:model.args.params| Returns: :py:class:`tf.Tensor` of type `self.dtype`. """ norm = self._check_input_norm(norm, none_is_error=True) with self._convert_sort_x(x) as x, self._check_set_input_params(params=params): value = self._single_hook_pdf(x=x, norm=norm) if run.numeric_checks: z.check_numerics(value, message="Check if pdf output contains any NaNs of Infs") return znp.atleast_1d(znp.asarray(z.to_real(value))) def _single_hook_pdf(self, x, norm): return self._hook_pdf(x=x, norm=norm) def _hook_pdf(self, x, norm): return self._norm_pdf(x=x, norm=norm) def _norm_pdf(self, x, norm): try: return self._call_pdf(x=x, norm=norm) except NormNotImplemented: unnormed_pdf = self._call_pdf(x=x, norm=False) normalization = self.normalization(norm) return unnormed_pdf / normalization def _call_pdf(self, x, norm): with suppress(FunctionNotImplemented): return self._pdf(x, norm) with suppress(FunctionNotImplemented): return znp.exp(self._log_pdf(x, norm)) if self.is_extended: with suppress(FunctionNotImplemented): return self._ext_pdf(x, norm) / self.get_yield() # TODO: extend/refactor the calling return self._fallback_pdf(x, norm) def _fallback_pdf(self, x, norm): pdf = self._call_unnormalized_pdf(x) if norm.has_limits: pdf /= self._hook_normalization(norm=norm, options={}) return pdf @_BasePDF_register_check_support(True) @deprecated_norm_range def _log_pdf(self, x, norm): # noqa: ARG002 raise SpecificFunctionNotImplemented
[docs] @deprecated_norm_range def log_pdf( self, x: ztyping.XType, norm: ztyping.LimitsType = None, *, params: ztyping.ParamsTypeOpt = None, ) -> ztyping.XType: """Log probability density function normalized over ``norm_range``. Args: x: |@doc:pdf.param.x| Data to evaluate the method on. Should be ``ZfitData`` or a mapping of *obs* to numpy-like arrays. If an array is given, the first dimension is interpreted as the events while the second is meant to be the dimensionality of a single event. |@docend:pdf.param.x| norm: |@doc:pdf.param.norm| Normalization of the function. By default, this is the ``norm`` of the PDF (which by default is the same as the space of the PDF). Should be ``ZfitSpace`` to define the space to normalize over. |@docend:pdf.param.norm| params: |@doc:model.args.params| Mapping of the parameter names to the actual values. The parameter names refer to the names of the parameters, typically :py:class:`~zfit.Parameter`, that the model was _initialized_ with, not the name of the models parametrization. |@docend:model.args.params| Returns: A ``Tensor`` of type ``self.dtype``. """ norm = self._check_input_norm(norm) with self._convert_sort_x(x) as x, self._check_set_input_params(params=params): return znp.asarray(z.to_real(self._single_hook_log_pdf(x=x, norm=norm)))
def _single_hook_log_pdf(self, x, norm): return self._hook_log_pdf(x=x, norm=norm) def _hook_log_pdf(self, x, norm): return self._norm_log_pdf(x=x, norm=norm) def _norm_log_pdf(self, x, norm): try: return self._call_log_pdf(x=x, norm=norm) except NormNotImplemented: unnormed_log_pdf = self._call_log_pdf(x=x, norm=False) normalization = self.log_normalization(norm) return unnormed_log_pdf - normalization def _call_log_pdf(self, x, norm): with suppress(FunctionNotImplemented): return self._log_pdf(x, norm) with suppress(FunctionNotImplemented): return znp.log(self._pdf(x, norm)) return self._fallback_log_pdf(x, norm) def _fallback_log_pdf(self, x, norm): return znp.log(self._hook_pdf(x=x, norm=norm)) @_BasePDF_register_check_support(True) @deprecated_norm_range def _log_normalization(self, norm, *, params=None, options): # noqa: ARG002 raise SpecificFunctionNotImplemented
[docs] def log_normalization( self, norm: ztyping.LimitsType, *, options=None, params: ztyping.ParamsTypeOpt = None, ) -> ztyping.XType: """Return the normalization of the function (usually the integral over ``norm``). Args: norm: |@doc:pdf.param.norm| Normalization of the function. By default, this is the ``norm`` of the PDF (which by default is the same as the space of the PDF). Should be ``ZfitSpace`` to define the space to normalize over. |@docend:pdf.param.norm| options: |@doc:pdf.param.options||@docend:pdf.param.options| params: |@doc:model.args.params| Mapping of the parameter names to the actual values. The parameter names refer to the names of the parameters, typically :py:class:`~zfit.Parameter`, that the model was _initialized_ with, not the name of the models parametrization. |@docend:model.args.params| Returns: The normalization value """ if options is None: options = {} norm = self._check_input_norm(norm, none_is_error=True) with self._check_set_input_params(params=params): return self._single_hook_log_normalization(norm=norm, options=options)
def _single_hook_log_normalization(self, norm, options): # TODO(Mayou36): add yield? return self._hook_normalization(norm=norm, options=options) def _hook_log_normalization(self, norm, options): return self._call_normalization(norm=norm, options=options) # no _norm_* needed def _call_log_normalization(self, norm, options): # TODO: caching? alternative with suppress(FunctionNotImplemented): return self._normalization(norm=norm, options=options) return self._fallback_normalization(norm, options=options) def _fallback_log_normalization(self, norm, options): return znp.log(self._hook_normalization(norm=norm, options=options)) @z.function(wraps="model") @deprecated_norm_range def ext_integrate( self, limits: ztyping.LimitsType, norm: ztyping.LimitsType = None, *, options=None, params: ztyping.ParamsTypeOpt = None, ) -> ztyping.XType: """Integrate the function over ``limits`` (normalized over ``norm_range`` if not False). Args: limits: |@doc:pdf.integrate.limits| Limits of the integration. |@docend:pdf.integrate.limits| norm: |@doc:pdf.integrate.norm| Normalization of the integration. By default, this is the same as the default space of the PDF. ``False`` means no normalization and returns the unnormed integral. |@docend:pdf.integrate.norm| options: |@doc:pdf.integrate.options| Options for the integration. Additional options for the integration. Currently supported options are: - type: one of (``bins``) This hints that bins are integrated. A method that is vectorizable, non-dynamic and therefore less suitable for complicated functions is chosen. |@docend:pdf.integrate.options| params: |@doc:model.args.params| Mapping of the parameter names to the actual values. The parameter names refer to the names of the parameters, typically :py:class:`~zfit.Parameter`, that the model was _initialized_ with, not the name of the models parametrization. |@docend:model.args.params| Returns: The integral value as a scalar with shape () """ if options is None: options = {} norm = self._check_input_norm(norm) limits = self._check_input_limits(limits=limits) if not self.is_extended: msg = f"{self} is not extended, cannot call `ext_pdf`" raise NotExtendedPDFError(msg) with self._check_set_input_params(params=params): return self.integrate(limits=limits, norm=norm, options=options) * self.get_yield() def _apply_yield(self, value: float, norm: ztyping.LimitsType, log: bool) -> float | tf.Tensor: if self.is_extended and not norm.limits_are_false: if log: value += znp.log(self.get_yield()) else: value *= self.get_yield() return value @deprecated(None, "Use the public `set_yield` instead.") def _set_yield_inplace(self, value: ZfitParameter | float | None): """Make the model extended by setting a yield. This does not alter the general behavior of the PDF. If there is a ``norm`` given, the output of the above functions does not represent a normalized probability density function anymore but corresponds to a number probability. Args: value: """ self._set_yield(value=value)
[docs] def create_extended( self, yield_: ztyping.ParamTypeInput, name: str | None = None, *, name_addition: str | None = None, ) -> ZfitPDF: """Return an extended version of this pdf with yield ``yield_``. The parameters are shared. Args: yield_: |@doc:pdf.param.yield| Yield (expected number of events) of the PDF. This is the expected number of events. If this is parameter-like, it will be used as the yield, the expected number of events, and the PDF will be extended. An extended PDF has additional functionality, such as the ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.param.yield| name: New name of the PDF. If ``None``, the name of the PDF with a trailing "_ext" is used. Returns: :py:class:`~zfit.core.interfaces.ZfitPDF`: a new PDF that is extended """ # TODO(Mayou36): fix copy if name_addition is not None: msg = "name_addition is not supported anymore, use `name` instead." raise BreakingAPIChangeError(msg) from zfit.models.functor import ProductPDF name = f"{self.name}_ext" if name is None else name if isinstance(self, ProductPDF): warnings.warn( "As `copy` is not yet properly implemented, this may fails (for ProductPDF for example?). This" "will be fixed in the future.", category=UserWarning, stacklevel=2, ) if self.is_extended: msg = "This PDF is already extended, cannot create an extended one." raise AlreadyExtendedPDFError(msg) try: new_pdf = self.copy(name=name) except Exception as error: msg = ( f"PDF {self} could not be copied, therefore `create_extended` failed and a new " f"extended PDF cannot be created. As an alternative, you can use `set_yield`" f" to set the yield on the current PDF *inplace* (this won't return a new PDF but" f" instead modify the existing)." ) raise RuntimeError(msg) from error new_pdf.set_yield(value=yield_) return new_pdf
[docs] def set_yield(self, value): """Make the model extended **inplace** by setting a yield. If possible, prefer to use ``create_extended``. This does not alter the general behavior of the PDF. The ``pdf`` and ``integrate`` and similar methods will continue to return the same - normalized to 1 - values. However, not only can this parameter be accessed via ``get_yield``, the methods ``ext_pdf`` and ``ext_integral`` provide a version of ``pdf`` and ``integrate`` respecetively that is multiplied by the yield. These can be useful for plotting and for binned likelihoods. Args: value: |@doc:pdf.param.yield| Yield (expected number of events) of the PDF. This is the expected number of events. If this is parameter-like, it will be used as the yield, the expected number of events, and the PDF will be extended. An extended PDF has additional functionality, such as the ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.param.yield| """ self._set_yield(value=value)
def _set_yield(self, value: ztyping.ParamTypeInput): if value is None: msg = "Cannot unset a yield (anymore)." raise BreakingAPIChangeError(msg) if self.is_extended: msg = f"Cannot extend {self}, is already extended." raise AlreadyExtendedPDFError(msg) value = convert_to_parameter(value) self.add_cache_deps(value) self._yield = value # not ideal, should be in parametrized. But we don't have too many base classes, so this should work self._assert_params_unique() @property def is_extended(self) -> bool: """Flag to tell whether the model is extended or not. Returns: A boolean. """ return self._yield is not None def _hook_sample(self, limits, n): if n is None and self.is_extended: n = "extended" if isinstance(n, str) and n == "extended": if not self.is_extended: msg = "Cannot use 'extended' as value for `n` on a non-extended pdf." raise NotExtendedPDFError(msg) samples = extended_sampling(pdfs=self, limits=limits) elif isinstance(n, str): msg = "`n` is a string and not 'extended'. Other options are currently not implemented." raise ValueError(msg) elif n is None: msg = "`n` cannot be `None` if pdf is not extended." raise tf.errors.InvalidArgumentError(msg) else: samples = super()._hook_sample(limits=limits, n=n) return samples
[docs] def get_yield(self) -> Parameter | None: """Return the yield (only for extended models). Returns: The yield of the current model or None """ return self._yield
@property def extended(self) -> Parameter | None: """Return the yield (only for extended models). Returns: The yield of the current model or None """ return self.get_yield() def _get_params( self, floating: bool | None = True, is_yield: bool | None = None, extract_independent: bool | None = True, ) -> set[ZfitParameter]: params = super()._get_params(floating, is_yield=is_yield, extract_independent=extract_independent) if is_yield is not False: if self.is_extended: yield_params = extract_filter_params( self.get_yield(), floating=floating, extract_independent=extract_independent, ) yield_params.update(params) # putting the yields at the beginning params = yield_params elif is_yield is True: msg = "PDF is not extended but only yield parameters were requested." raise NotExtendedPDFError(msg) return params
[docs] def create_projection_pdf( self, *, limits: ztyping.LimitsTypeInput = None, obs: ztyping.LimitsTypeInput = None, options=None, name: str | None = None, label: str | None = None, extended: ExtendedInputType = None, norm: NormInputType = None, ) -> ZfitPDF: """Create a PDF projection by integrating out some dimensions. The new projection pdf is still fully dependent on the pdf it was created with. Args: limits: Limits of the integration to project out. If not given, all observables that are not in `obs` are projected on using the default limits of the observables. obs: Observables to project on. If not given, all observables that are not in `limits` are projected on. options: |@doc:pdf.integrate.options| Options for the integration. Additional options for the integration. Currently supported options are: - type: one of (``bins``) This hints that bins are integrated. A method that is vectorizable, non-dynamic and therefore less suitable for complicated functions is chosen. |@docend:pdf.integrate.options| Returns: A pdf without the dimensions from ``limits``. """ from ..models.special import SimpleFunctorPDF if limits is None: if obs is None: msg = "Either `limits` or `obs` have to be given." raise ValueError(msg) obs = convert_to_space(obs) limit_obs = [ob for ob in self.obs if ob not in obs.obs] if not limit_obs: msg = f"No observables to integrate out: `obs` contains all observables {obs}." raise ValueError(msg) limits = self.space.with_obs(limit_obs) if not obs.has_limits: obs = self.space.with_obs(obs.obs) else: limits = convert_to_space(limits) if not limits.has_limits: limits = self.space.with_obs(limits.obs) if obs is None: obs = self.space.with_obs([ob for ob in self.obs if ob not in limits.obs]) else: obs = convert_to_space(obs) if not obs.has_limits: obs = self.space.with_obs(obs.obs) if not set(obs.obs).isdisjoint(limits.obs): msg = ( f"The `obs` to project on ({obs}) and the `limits` to integrate over ({limits}) " "have to be disjoint." ) raise ValueError(msg) def partial_integrate_wrapped(self_simple, x): del self_simple return self.partial_integrate( x, limits=limits, options=options, norm=False ) # todo: it should be fine not to normalize, right? if label is None: label = f"{self.label}_projon_{obs.obs[0]}" if extended is None: extended = self.is_extended if extended is True: extended = self.get_yield() return SimpleFunctorPDF( obs=obs, pdfs=(self,), func=partial_integrate_wrapped, name=name, label=label, extended=extended, norm=norm, )
[docs] def copy(self, **override_parameters) -> BasePDF: """Creates a copy of the model. Note: the copy model may continue to depend on the original initialization arguments. Args: **override_parameters: String/value dictionary of initialization arguments to override with new value. Returns: A new instance of `type(self)` initialized from the union of self.parameters and override_parameters, i.e., `dict(self.parameters, **override_parameters)`. """ obs = self.norm # HACK(Mayou36): remove once copy is proper implemented from ..models.dist_tfp import WrapDistribution from ..models.kde import GaussianKDE1DimV1 from ..models.polynomials import RecursivePolynomial if type(self) == WrapDistribution: # NOT isinstance! Because e.g. Gauss wraps that and takes different args parameters = {"distribution": self._distribution, "dist_params": self.dist_params} else: # HACK END parameters = dict(self.params) lam = parameters.pop("lambda", None) if lam is not None: parameters["lam"] = lam if type(self) == GaussianKDE1DimV1: msg = ( "Cannot copy `GaussianKDE1DimV1` (yet). If you tried to make it extended, use " "`set_yield`" " instead and set it inplace." ) raise RuntimeError(msg) parameters["data"] = self._original_data # HACK(Mayou36): copy the polynomial correct, replace 'c_0' with coeff0/coeff_0 or similar if isinstance(self, RecursivePolynomial): parameters["coeff0"] = parameters.pop("c_0", None) coeffs = [] i_coeff = 1 # collect coeffs and convert to 'coeff' list while True: coeff_name = f"c_{i_coeff}" try: coeff = parameters.pop(coeff_name) except KeyError: break else: coeffs.append(coeff) i_coeff += 1 parameters["coeffs"] = coeffs from zfit.models.functor import BaseFunctor, SumPDF if isinstance(self, BaseFunctor): parameters = {} if isinstance(self, SumPDF): fracs = self.fracs if not self.is_extended: fracs = fracs[:-1] parameters.update(fracs=fracs) parameters.update(pdfs=self.pdfs) parameters.update(obs=obs, name=self.name) parameters.update(**override_parameters) # if hasattr(self, "distribution"): # parameters.update(distribution=self.distribution) yield_ = parameters.pop("yield", None) new_instance = type(self)(**parameters) if yield_ is not None: new_instance.set_yield(yield_) return new_instance
[docs] @deprecated_norm_range def as_func(self, norm: ztyping.LimitsType = False): """Return a `Function` with the function `model(x, norm=norm)`. Args: norm: If not False or a `ZfitSpace`, this will be used to call the `pdf` function. """ from .operations import convert_pdf_to_func # prevent circular import return convert_pdf_to_func(pdf=self, norm=norm)
def __str__(self): return f"{type(self).__name__} {self.label}"
[docs] def to_unbinned(self): """Convert to unbinned pdf, returns self if already unbinned.""" return self
[docs] def to_binned( self, space: ztyping.SpaceType, extended: ExtendedInputType = None, norm: NormInputType = None, name: Optional[str] = None, label: Optional[str] = None, ): """Convert to binned pdf, returns self if already binned.""" from ..models.tobinned import BinnedFromUnbinnedPDF return BinnedFromUnbinnedPDF(pdf=self, space=space, extended=extended, norm=norm, name=name, label=label)
[docs] def to_truncated( self, limits: ZfitSpace | Iterable[ZfitSpace] | None = None, *, obs=None, extended=None, norm=None, name: str | None = None, label: str | None = None, ): """Convert the PDF to a truncated version with possibly different and multiple limits. The arguments are the same as for :py:class:`~zfit.pdf.TruncatedPDF`, the only difference being that if no limits are given, the limit of the PDF is used, thereby truncating the PDF to its original limits. Args: pdf: The PDF to be truncated. limits: The limits to truncate the PDF. Can be a single limit or multiple limits. obs: |@doc:pdf.init.obs| Observables of the model. This will be used as the default space of the PDF and, if not given explicitly, as the normalization range. The default space is used for example in the sample method: if no sampling limits are given, the default space is used. If the observables are binned and the model is unbinned, the model will be a binned model, by wrapping the model in a :py:class:`~zfit.pdf.BinnedFromUnbinnedPDF`, equivalent to calling :py:meth:`~zfit.pdf.BasePDF.to_binned`. The observables are not equal to the domain as it does not restrict or truncate the model outside this range. |@docend:pdf.init.obs| extended: |@doc:pdf.init.extended| The overall yield of the PDF. If this is parameter-like, it will be used as the yield, the expected number of events, and the PDF will be extended. An extended PDF has additional functionality, such as the ``ext_*`` methods and the ``counts`` (for binned PDFs). |@docend:pdf.init.extended| If None, the PDF will be extended if the original PDF is extended. If ``True`` and the original PDF is extended, the yield will be scaled to the fraction of the total integral that is within the limits. Therefore, the overall yield is comparable, i.e. the pdfs can be plotted "on top of each other". norm: |@doc:pdf.init.norm| Normalization of the PDF. By default, this is the same as the default space of the PDF. |@docend:pdf.init.norm| name: |@doc:pdf.init.name| Name of the PDF. Maybe has implications on the serialization and deserialization of the PDF. For a human-readable name, use the label. |@docend:pdf.init.name| label: |@doc:pdf.init.label| Human-readable name or label of the PDF for a better description, to be used with plots etc. Has no programmatical functional purpose as identification. |@docend:pdf.init.label| """ from ..models.truncated import TruncatedPDF if limits is None: limits = obs if obs is not None else self.space if obs is None: obs = self.space if name is None: name = self.name + "_truncated" if label is None: label = self.label + " _truncated" if norm is None: norm = self.norm return TruncatedPDF(pdf=self, obs=obs, limits=limits, extended=extended, norm=norm, name=name, label=label)