Skip to content

Commit

Permalink
feat: Expose fitted parameter values of implicit fits in test statist…
Browse files Browse the repository at this point in the history
…ic calls (#1554)

* Add `return_calculator` kwarg to `pyhf.infer.hypotest`
* Add `return_fitted_pars` kwarg to `pyhf.infer.calculators.generate_asimov_data`
* Add `return_fitted_pars` kwarg to test statistic functions in `pyhf.infer.test_statistics`
* Add `fitted_pars` property to `pyhf.infer.calculators.AsymptoticCalculator`
* Add tests for return_fitted_pars options
* Add Lars Henkelmann to contributor list
  • Loading branch information
lhenkelm authored Oct 13, 2021
1 parent 9358f85 commit b5bb9e3
Show file tree
Hide file tree
Showing 8 changed files with 383 additions and 25 deletions.
1 change: 1 addition & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ Calculators
:nosignatures:

calculators.generate_asimov_data
calculators.HypoTestFitResults
calculators.AsymptoticTestStatDistribution
calculators.EmpiricalDistribution
calculators.AsymptoticCalculator
Expand Down
1 change: 1 addition & 0 deletions docs/contributors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ Contributors include:
- Saransh Chopra
- Sviatoslav Sydorenko
- Mason Proffitt
- Lars Henkelmann
- Aryan Roy
14 changes: 13 additions & 1 deletion src/pyhf/infer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def hypotest(
return_tail_probs=False,
return_expected=False,
return_expected_set=False,
return_calculator=False,
**kwargs,
):
r"""
Expand Down Expand Up @@ -66,9 +67,12 @@ def hypotest(
return_tail_probs (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}`
return_expected (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}`
return_expected_set (:obj:`bool`): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band"
return_calculator (:obj:`bool`): Bool for returning calculator.
Returns:
Tuple of Floats and lists of Floats:
Tuple of Floats and lists of Floats and
a :py:class:`~pyhf.infer.calculators.AsymptoticCalculator`
or :py:class:`~pyhf.infer.calculators.ToyCalculator` instance:
- :math:`\mathrm{CL}_{s}`: The modified :math:`p`-value compared to
the given threshold :math:`\alpha`, typically taken to be :math:`0.05`,
Expand Down Expand Up @@ -139,6 +143,12 @@ def hypotest(
referred to as the "Brazil band".
Only returned when ``return_expected_set`` is ``True``.
- a calculator: The calculator instance used in the computation of the :math:`p`-values.
Either an instance of :py:class:`~pyhf.infer.calculators.AsymptoticCalculator`
or :py:class:`~pyhf.infer.calculators.ToyCalculator`,
depending on the value of ``calctype``.
Only returned when ``return_calculator`` is ``True``.
"""
init_pars = init_pars or pdf.config.suggested_init()
par_bounds = par_bounds or pdf.config.suggested_bounds()
Expand Down Expand Up @@ -188,6 +198,8 @@ def hypotest(
_returns.append(pvalues_exp_band)
elif return_expected:
_returns.append(tb.astensor(pvalues_exp_band[2]))
if return_calculator:
_returns.append(calc)
# Enforce a consistent return type of the observed CLs
return tuple(_returns) if len(_returns) > 1 else _returns[0]

Expand Down
73 changes: 66 additions & 7 deletions src/pyhf/infer/calculators.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pyhf.infer import utils
import tqdm

from dataclasses import dataclass
import logging

log = logging.getLogger(__name__)
Expand All @@ -29,7 +30,9 @@ def __dir__():
return __all__


def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_params):
def generate_asimov_data(
asimov_mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False
):
"""
Compute Asimov Dataset (expected yields at best-fit values) for a given POI value.
Expand All @@ -46,6 +49,14 @@ def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_para
>>> pyhf.infer.calculators.generate_asimov_data(mu_test, data, model, None, None, None)
array([ 60.61229858, 56.52802479, 270.06832542, 48.31545488])
It is possible to access the Asimov parameters as well:
>>> pyhf.infer.calculators.generate_asimov_data(
... mu_test, data, model, None, None, None,
... return_fitted_pars = True
... )
(array([ 60.61229858, 56.52802479, 270.06832542, 48.31545488]), array([1. , 0.97224597, 0.87553894]))
Args:
asimov_mu (:obj:`float`): The value for the parameter of interest to be used.
data (:obj:`tensor`): The observed data.
Expand All @@ -56,15 +67,23 @@ def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_para
The shape should be ``(n, 2)`` for ``n`` model parameters.
fixed_params (:obj:`tensor` of :obj:`bool`): The flag to set a parameter constant to its starting
value during minimization.
return_fitted_pars (:obj:`bool`): Return the best-fit parameter values for the given ``asimov_mu``.
Returns:
Tensor: The Asimov dataset.
A Tensor or a Tuple of two Tensors:
- The Asimov dataset.
- The Asimov parameters. Only returned if ``return_fitted_pars`` is ``True``.
"""
bestfit_nuisance_asimov = fixed_poi_fit(
asimov_mu, data, pdf, init_pars, par_bounds, fixed_params
)
return pdf.expected_data(bestfit_nuisance_asimov)
asimov_data = pdf.expected_data(bestfit_nuisance_asimov)
if return_fitted_pars:
return asimov_data, bestfit_nuisance_asimov
return asimov_data


class AsymptoticTestStatDistribution:
Expand Down Expand Up @@ -188,6 +207,21 @@ def expected_value(self, nsigma):
)


@dataclass(frozen=True)
class HypoTestFitResults:
"""
Fitted model parameters of the fits in
:py:meth:`AsymptoticCalculator.teststatistic <pyhf.infer.calculators.AsymptoticCalculator.teststatistic>`
"""

# ignore "F821 undefined name 'Tensor'" so as to avoid typing.Any
asimov_pars: 'Tensor' # noqa: F821
free_fit_to_data: 'Tensor' # noqa: F821
free_fit_to_asimov: 'Tensor' # noqa: F821
fixed_poi_fit_to_data: 'Tensor' # noqa: F821
fixed_poi_fit_to_asimov: 'Tensor' # noqa: F821


class AsymptoticCalculator:
"""The Asymptotic Calculator."""

Expand Down Expand Up @@ -251,6 +285,7 @@ def __init__(
self.test_stat = test_stat
self.calc_base_dist = calc_base_dist
self.sqrtqmuA_v = None
self.fitted_pars = None

def distributions(self, poi_test):
r"""
Expand Down Expand Up @@ -297,9 +332,13 @@ def distributions(self, poi_test):
return sb_dist, b_dist

def teststatistic(self, poi_test):
"""
r"""
Compute the test statistic for the observed data under the studied model.
The fitted parameters of the five fits that are implicitly ran at every call
of this method are afterwards accessible through ``self.fitted_pars``,
which is a :py:class:`~pyhf.infer.calculators.HypoTestFitResults` instance.
Example:
>>> import pyhf
Expand All @@ -314,6 +353,16 @@ def teststatistic(self, poi_test):
>>> asymptotic_calculator.teststatistic(mu_test)
array(0.14043184)
Access the best-fit parameters afterwards:
>>> asymptotic_calculator.fitted_pars
HypoTestFitResults(asimov_pars=array([0. , 1.0030482 , 0.96264534]), free_fit_to_data=array([0. , 1.0030512 , 0.96266961]), free_fit_to_asimov=array([0. , 1.00304893, 0.96263365]), fixed_poi_fit_to_data=array([1. , 0.97224597, 0.87553894]), fixed_poi_fit_to_asimov=array([1. , 0.97276864, 0.87142047]))
E.g. the :math:`\hat{\mu}` and :math:`\hat{\theta}` fitted to the asimov dataset:
>>> asymptotic_calculator.fitted_pars.free_fit_to_asimov
array([0. , 1.00304893, 0.96263365])
Args:
poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest.
Expand All @@ -325,35 +374,45 @@ def teststatistic(self, poi_test):

teststat_func = utils.get_test_stat(self.test_stat)

qmu_v = teststat_func(
qmu_v, (mubhathat, muhatbhat) = teststat_func(
poi_test,
self.data,
self.pdf,
self.init_pars,
self.par_bounds,
self.fixed_params,
return_fitted_pars=True,
)
sqrtqmu_v = tensorlib.sqrt(qmu_v)

asimov_mu = 1.0 if self.test_stat == 'q0' else 0.0

asimov_data = generate_asimov_data(
asimov_data, asimov_mubhathat = generate_asimov_data(
asimov_mu,
self.data,
self.pdf,
self.init_pars,
self.par_bounds,
self.fixed_params,
return_fitted_pars=True,
)
qmuA_v = teststat_func(
qmuA_v, (mubhathat_A, muhatbhat_A) = teststat_func(
poi_test,
asimov_data,
self.pdf,
self.init_pars,
self.par_bounds,
self.fixed_params,
return_fitted_pars=True,
)
self.sqrtqmuA_v = tensorlib.sqrt(qmuA_v)
self.fitted_pars = HypoTestFitResults(
asimov_pars=asimov_mubhathat,
free_fit_to_data=muhatbhat,
free_fit_to_asimov=muhatbhat_A,
fixed_poi_fit_to_data=mubhathat,
fixed_poi_fit_to_asimov=mubhathat_A,
)

if self.test_stat in ["q", "q0"]: # qmu or q0
teststat = sqrtqmu_v - self.sqrtqmuA_v
Expand Down
Loading

0 comments on commit b5bb9e3

Please sign in to comment.