feat: Expose fitted parameter values of implicit fits in test statist…

…ic calls (#1554) * Add `return_calculator` kwarg to `pyhf.infer.hypotest` * Add `return_fitted_pars` kwarg to `pyhf.infer.calculators.generate_asimov_data` * Add `return_fitted_pars` kwarg to test statistic functions in `pyhf.infer.test_statistics` * Add `fitted_pars` property to `pyhf.infer.calculators.AsymptoticCalculator` * Add tests for return_fitted_pars options * Add Lars Henkelmann to contributor list
scikit-hep · Oct 13, 2021 · b5bb9e3 · b5bb9e3
1 parent 9358f85
commit b5bb9e3
Show file tree

Hide file tree

Showing 8 changed files with 383 additions and 25 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -140,6 +140,7 @@ Calculators
    :nosignatures:
 
    calculators.generate_asimov_data
+   calculators.HypoTestFitResults
    calculators.AsymptoticTestStatDistribution
    calculators.EmpiricalDistribution
    calculators.AsymptoticCalculator

diff --git a/docs/contributors.rst b/docs/contributors.rst
@@ -25,4 +25,5 @@ Contributors include:
 - Saransh Chopra
 - Sviatoslav Sydorenko
 - Mason Proffitt
+- Lars Henkelmann
 - Aryan Roy
diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
@@ -28,6 +28,7 @@ def hypotest(
     return_tail_probs=False,
     return_expected=False,
     return_expected_set=False,
+    return_calculator=False,
     **kwargs,
 ):
     r"""
@@ -66,9 +67,12 @@ def hypotest(
         return_tail_probs (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}`
         return_expected (:obj:`bool`): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}`
         return_expected_set (:obj:`bool`): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band"
+        return_calculator (:obj:`bool`): Bool for returning calculator.
 
     Returns:
-        Tuple of Floats and lists of Floats:
+        Tuple of Floats and lists of Floats and
+        a :py:class:`~pyhf.infer.calculators.AsymptoticCalculator`
+        or :py:class:`~pyhf.infer.calculators.ToyCalculator` instance:
 
             - :math:`\mathrm{CL}_{s}`: The modified :math:`p`-value compared to
               the given threshold :math:`\alpha`, typically taken to be :math:`0.05`,
@@ -139,6 +143,12 @@ def hypotest(
             referred to as the "Brazil band".
             Only returned when ``return_expected_set`` is ``True``.
 
+            - a calculator: The calculator instance used in the computation of the :math:`p`-values.
+              Either an instance of :py:class:`~pyhf.infer.calculators.AsymptoticCalculator`
+              or :py:class:`~pyhf.infer.calculators.ToyCalculator`,
+              depending on the value of ``calctype``.
+              Only returned when ``return_calculator`` is ``True``.
+
     """
     init_pars = init_pars or pdf.config.suggested_init()
     par_bounds = par_bounds or pdf.config.suggested_bounds()
@@ -188,6 +198,8 @@ def hypotest(
         _returns.append(pvalues_exp_band)
     elif return_expected:
         _returns.append(tb.astensor(pvalues_exp_band[2]))
+    if return_calculator:
+        _returns.append(calc)
     # Enforce a consistent return type of the observed CLs
     return tuple(_returns) if len(_returns) > 1 else _returns[0]
 

diff --git a/src/pyhf/infer/calculators.py b/src/pyhf/infer/calculators.py
@@ -12,6 +12,7 @@
 from pyhf.infer import utils
 import tqdm
 
+from dataclasses import dataclass
 import logging
 
 log = logging.getLogger(__name__)
@@ -29,7 +30,9 @@ def __dir__():
     return __all__
 
 
-def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_params):
+def generate_asimov_data(
+    asimov_mu, data, pdf, init_pars, par_bounds, fixed_params, return_fitted_pars=False
+):
     """
     Compute Asimov Dataset (expected yields at best-fit values) for a given POI value.
 
@@ -46,6 +49,14 @@ def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_para
         >>> pyhf.infer.calculators.generate_asimov_data(mu_test, data, model, None, None, None)
         array([ 60.61229858,  56.52802479, 270.06832542,  48.31545488])
 
+        It is possible to access the Asimov parameters as well:
+
+        >>> pyhf.infer.calculators.generate_asimov_data(
+        ...     mu_test, data, model, None, None, None,
+        ...     return_fitted_pars = True
+        ... )
+        (array([ 60.61229858,  56.52802479, 270.06832542,  48.31545488]), array([1.        , 0.97224597, 0.87553894]))
+
     Args:
         asimov_mu (:obj:`float`): The value for the parameter of interest to be used.
         data (:obj:`tensor`): The observed data.
@@ -56,15 +67,23 @@ def generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds, fixed_para
             The shape should be ``(n, 2)`` for ``n`` model parameters.
         fixed_params (:obj:`tensor` of :obj:`bool`): The flag to set a parameter constant to its starting
             value during minimization.
+        return_fitted_pars (:obj:`bool`): Return the best-fit parameter values for the given ``asimov_mu``.
+
 
     Returns:
-        Tensor: The Asimov dataset.
+        A Tensor or a Tuple of two Tensors:
+
+             - The Asimov dataset.
 
+             - The Asimov parameters. Only returned if ``return_fitted_pars`` is ``True``.
     """
     bestfit_nuisance_asimov = fixed_poi_fit(
         asimov_mu, data, pdf, init_pars, par_bounds, fixed_params
     )
-    return pdf.expected_data(bestfit_nuisance_asimov)
+    asimov_data = pdf.expected_data(bestfit_nuisance_asimov)
+    if return_fitted_pars:
+        return asimov_data, bestfit_nuisance_asimov
+    return asimov_data
 
 
 class AsymptoticTestStatDistribution:
@@ -188,6 +207,21 @@ def expected_value(self, nsigma):
         )
 
 
+@dataclass(frozen=True)
+class HypoTestFitResults:
+    """
+    Fitted model parameters of the fits in
+    :py:meth:`AsymptoticCalculator.teststatistic <pyhf.infer.calculators.AsymptoticCalculator.teststatistic>`
+    """
+
+    # ignore "F821 undefined name 'Tensor'" so as to avoid typing.Any
+    asimov_pars: 'Tensor'  # noqa: F821
+    free_fit_to_data: 'Tensor'  # noqa: F821
+    free_fit_to_asimov: 'Tensor'  # noqa: F821
+    fixed_poi_fit_to_data: 'Tensor'  # noqa: F821
+    fixed_poi_fit_to_asimov: 'Tensor'  # noqa: F821
+
+
 class AsymptoticCalculator:
     """The Asymptotic Calculator."""
 
@@ -251,6 +285,7 @@ def __init__(
         self.test_stat = test_stat
         self.calc_base_dist = calc_base_dist
         self.sqrtqmuA_v = None
+        self.fitted_pars = None
 
     def distributions(self, poi_test):
         r"""
@@ -297,9 +332,13 @@ def distributions(self, poi_test):
         return sb_dist, b_dist
 
     def teststatistic(self, poi_test):
-        """
+        r"""
         Compute the test statistic for the observed data under the studied model.
 
+        The fitted parameters of the five fits that are implicitly ran at every call
+        of this method are afterwards accessible through ``self.fitted_pars``,
+        which is a :py:class:`~pyhf.infer.calculators.HypoTestFitResults` instance.
+
         Example:
 
             >>> import pyhf
@@ -314,6 +353,16 @@ def teststatistic(self, poi_test):
             >>> asymptotic_calculator.teststatistic(mu_test)
             array(0.14043184)
 
+            Access the best-fit parameters afterwards:
+
+            >>> asymptotic_calculator.fitted_pars
+            HypoTestFitResults(asimov_pars=array([0.        , 1.0030482 , 0.96264534]), free_fit_to_data=array([0.        , 1.0030512 , 0.96266961]), free_fit_to_asimov=array([0.        , 1.00304893, 0.96263365]), fixed_poi_fit_to_data=array([1.        , 0.97224597, 0.87553894]), fixed_poi_fit_to_asimov=array([1.        , 0.97276864, 0.87142047]))
+
+            E.g. the :math:`\hat{\mu}` and :math:`\hat{\theta}` fitted to the asimov dataset:
+
+            >>> asymptotic_calculator.fitted_pars.free_fit_to_asimov
+            array([0.        , 1.00304893, 0.96263365])
+
         Args:
             poi_test (:obj:`float` or :obj:`tensor`): The value for the parameter of interest.
 
@@ -325,35 +374,45 @@ def teststatistic(self, poi_test):
 
         teststat_func = utils.get_test_stat(self.test_stat)
 
-        qmu_v = teststat_func(
+        qmu_v, (mubhathat, muhatbhat) = teststat_func(
             poi_test,
             self.data,
             self.pdf,
             self.init_pars,
             self.par_bounds,
             self.fixed_params,
+            return_fitted_pars=True,
         )
         sqrtqmu_v = tensorlib.sqrt(qmu_v)
 
         asimov_mu = 1.0 if self.test_stat == 'q0' else 0.0
 
-        asimov_data = generate_asimov_data(
+        asimov_data, asimov_mubhathat = generate_asimov_data(
             asimov_mu,
             self.data,
             self.pdf,
             self.init_pars,
             self.par_bounds,
             self.fixed_params,
+            return_fitted_pars=True,
         )
-        qmuA_v = teststat_func(
+        qmuA_v, (mubhathat_A, muhatbhat_A) = teststat_func(
             poi_test,
             asimov_data,
             self.pdf,
             self.init_pars,
             self.par_bounds,
             self.fixed_params,
+            return_fitted_pars=True,
         )
         self.sqrtqmuA_v = tensorlib.sqrt(qmuA_v)
+        self.fitted_pars = HypoTestFitResults(
+            asimov_pars=asimov_mubhathat,
+            free_fit_to_data=muhatbhat,
+            free_fit_to_asimov=muhatbhat_A,
+            fixed_poi_fit_to_data=mubhathat,
+            fixed_poi_fit_to_asimov=mubhathat_A,
+        )
 
         if self.test_stat in ["q", "q0"]:  # qmu or q0
             teststat = sqrtqmu_v - self.sqrtqmuA_v