Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
BayesianCalculator.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id$
2 // Author: Kyle Cranmer, Lorenzo Moneta, Gregory Schott, Wouter Verkerke
3 /*************************************************************************
4  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 
12 /** \class RooStats::BayesianCalculator
13  \ingroup Roostats
14 
15 BayesianCalculator is a concrete implementation of IntervalCalculator, providing the computation
16 of a credible interval using a Bayesian method.
17 The class works only for one single parameter of interest and it integrates the likelihood function with the given prior
18 probability density function to compute the posterior probability. The result of the class is a one dimensional interval
19 (class SimpleInterval ), which is obtained from inverting the cumulative posterior distribution.
20 This calculator works then only for model with a single parameter of interest.
21 The model can instead have several nuisance parameters which are integrated (marginalized) in the computation of the posterior function.
22 The integration and normalization of the posterior is computed using numerical integration methods provided by ROOT.
23 See the MCMCCalculator for model with multiple parameters of interest.
24 
25 The interface allows one to construct the class by passing the data set, probability density function for the model, the prior
26 functions and then the parameter of interest to scan. The nuisance parameters can also be passed to be marginalized when
27 computing the posterior. Alternatively, the class can be constructed by passing the data and the ModelConfig containing
28 all the needed information (model pdf, prior pdf, parameter of interest, nuisance parameters, etc..)
29 
30 After configuring the calculator, one only needs to ask GetInterval(), which
31 will return an SimpleInterval object. By default the extrem of the integral are obtained by inverting directly the
32 cumulative posterior distribution. By using the method SetScanOfPosterior(nbins) the interval is then obtained by
33 scanning the posterior function in the given number of points. The first method is in general faster but it requires an
34 integration one extra dimension ( in the poi in addition to the nuisance parameters), therefore in some case it can be
35 less robust.
36 
37 The class can also return the posterior function (method GetPosteriorFunction) or if needed the normalized
38 posterior function (the posterior pdf) (method GetPosteriorPdf). A posterior plot is also obtained using
39 the GetPosteriorPlot method.
40 
41 The class allows to use different integration methods for integrating in (marginalizing) the nuisances and in the poi. All the numerical
42 integration methods of ROOT can be used via the method SetIntegrationType (see more in the documentation of
43 this method).
44 
45 Calculator estimating a credible interval using the Bayesian procedure.
46 The calculator computes given the model the posterior distribution and estimates the
47 credible interval from the given function.
48 */
49 
50 
51 // include other header files
52 
53 #include "RooAbsFunc.h"
54 #include "RooAbsReal.h"
55 #include "RooRealVar.h"
56 #include "RooArgSet.h"
57 #include "RooBrentRootFinder.h"
58 #include "RooFormulaVar.h"
59 #include "RooGenericPdf.h"
60 #include "RooPlot.h"
61 #include "RooProdPdf.h"
62 #include "RooDataSet.h"
63 
64 // include header file of this class
66 #include "RooStats/ModelConfig.h"
67 #include "RooStats/RooStatsUtils.h"
68 
69 #include "Math/IFunction.h"
71 #include "Math/Integrator.h"
72 #include "Math/RootFinder.h"
73 #include "Math/BrentMinimizer1D.h"
74 #include "RooFunctor.h"
75 #include "RooFunctor1DBinding.h"
76 #include "RooTFnBinding.h"
77 #include "RooMsgService.h"
78 
79 #include "TAxis.h"
80 #include "TF1.h"
81 #include "TH1.h"
82 #include "TMath.h"
83 #include "TCanvas.h"
84 
85 #include <map>
86 #include <cmath>
87 
88 //#include "TRandom.h"
89 #include "RConfigure.h"
90 
91 ClassImp(RooStats::BayesianCalculator);
92 
93 using namespace std;
94 
95 namespace RooStats {
96 
97 
98 // first some utility classes and functions
99 
100 #ifdef R__HAS_MATHMORE
101  const ROOT::Math::RootFinder::EType kRootFinderType = ROOT::Math::RootFinder::kGSL_BRENT;
102 #else
103  const ROOT::Math::RootFinder::EType kRootFinderType = ROOT::Math::RootFinder::kBRENT;
104 #endif
105 
106 
107 
108 
109 struct LikelihoodFunction {
110  LikelihoodFunction(RooFunctor & f, RooFunctor * prior = 0, double offset = 0) :
111  fFunc(f), fPrior(prior),
112  fOffset(offset), fMaxL(0) {
113  fFunc.binding().resetNumCall();
114  }
115 
116  void SetPrior(RooFunctor * prior) { fPrior = prior; }
117 
118  double operator() (const double *x ) const {
119  double nll = fFunc(x) - fOffset;
120  double likelihood = std::exp(-nll);
121 
122  if (fPrior) likelihood *= (*fPrior)(x);
123 
124  int nCalls = fFunc.binding().numCall();
125  if (nCalls > 0 && nCalls % 1000 == 0) {
126  ooccoutD((TObject*)0,Eval) << "Likelihood evaluation ncalls = " << nCalls
127  << " x0 " << x[0] << " nll = " << nll+fOffset;
128  if (fPrior) ooccoutD((TObject*)0,Eval) << " prior(x) = " << (*fPrior)(x);
129  ooccoutD((TObject*)0,Eval) << " likelihood " << likelihood
130  << " max Likelihood " << fMaxL << std::endl;
131  }
132 
133  if (likelihood > fMaxL ) {
134  fMaxL = likelihood;
135  if ( likelihood > 1.E10) {
136  ooccoutW((TObject*)0,Eval) << "LikelihoodFunction::() WARNING - Huge likelihood value found for parameters ";
137  for (int i = 0; i < fFunc.nObs(); ++i)
138  ooccoutW((TObject*)0,Eval) << " x[" << i << " ] = " << x[i];
139  ooccoutW((TObject*)0,Eval) << " nll = " << nll << " L = " << likelihood << std::endl;
140  }
141  }
142 
143  return likelihood;
144  }
145 
146  // for the 1D case
147  double operator() (double x) const {
148  // just call the previous method
149  assert(fFunc.nObs() == 1); // check nobs = 1
150  double tmp = x;
151  return (*this)(&tmp);
152  }
153 
154  RooFunctor & fFunc; // functor representing the nll function
155  RooFunctor * fPrior; // functor representing the prior function
156  double fOffset; // offset used to bring the nll in a reasonable range for computing the exponent
157  mutable double fMaxL;
158 };
159 
160 
161 // Posterior CDF Function class
162 // for integral of posterior function in nuisance and POI
163 // 1-Dim function as function of the poi
164 
165 class PosteriorCdfFunction : public ROOT::Math::IGenFunction {
166 
167 public:
168 
169  PosteriorCdfFunction(RooAbsReal & nll, RooArgList & bindParams, RooAbsReal * prior = 0, const char * integType = 0, double nllMinimum = 0) :
170  fFunctor(nll, bindParams, RooArgList() ), // functor
171  fPriorFunc(nullptr),
172  fLikelihood(fFunctor, 0, nllMinimum), // integral of exp(-nll) function
173  fIntegrator(ROOT::Math::IntegratorMultiDim::GetType(integType) ), // integrator
174  fXmin(bindParams.getSize() ), // vector of parameters (min values)
175  fXmax(bindParams.getSize() ), // vector of parameter (max values)
176  fNorm(1.0), fNormErr(0.0), fOffset(0), fMaxPOI(0),
177  fHasNorm(false), fUseOldValues(true), fError(false)
178  {
179 
180  if (prior) {
181  fPriorFunc = std::make_shared<RooFunctor>(*prior, bindParams, RooArgList());
182  fLikelihood.SetPrior(fPriorFunc.get() );
183  }
184 
185  fIntegrator.SetFunction(fLikelihood, bindParams.getSize() );
186 
187  ooccoutD((TObject*)0,NumIntegration) << "PosteriorCdfFunction::Compute integral of posterior in nuisance and poi. "
188  << " nllMinimum is " << nllMinimum << std::endl;
189 
190  std::vector<double> par(bindParams.getSize());
191  for (unsigned int i = 0; i < fXmin.size(); ++i) {
192  RooRealVar & var = (RooRealVar &) bindParams[i];
193  fXmin[i] = var.getMin();
194  fXmax[i] = var.getMax();
195  par[i] = var.getVal();
196  ooccoutD((TObject*)0,NumIntegration) << "PosteriorFunction::Integrate" << var.GetName()
197  << " in interval [ " << fXmin[i] << " , " << fXmax[i] << " ] " << std::endl;
198  }
199 
200  fIntegrator.Options().Print(ooccoutD((TObject*)0,NumIntegration));
201 
202  // store max POI value because it will be changed when evaluating the function
203  fMaxPOI = fXmax[0];
204 
205  // compute first the normalization with the poi
206  fNorm = (*this)( fMaxPOI );
207  if (fError) ooccoutE((TObject*)0,NumIntegration) << "PosteriorFunction::Error computing normalization - norm = " << fNorm << std::endl;
208  fHasNorm = true;
209  fNormCdfValues.insert(std::make_pair(fXmin[0], 0) );
210  fNormCdfValues.insert(std::make_pair(fXmax[0], 1.0) );
211 
212  }
213 
214  // copy constructor (needed for Cloning the object)
215  // need special treatment because integrator
216  // has no copy constructor
217  PosteriorCdfFunction(const PosteriorCdfFunction & rhs) :
218  ROOT::Math::IGenFunction(),
219  fFunctor(rhs.fFunctor),
220  //fPriorFunc(std::shared_ptr<RooFunctor>((RooFunctor*)0)),
221  fPriorFunc(rhs.fPriorFunc),
222  fLikelihood(fFunctor, fPriorFunc.get(), rhs.fLikelihood.fOffset),
223  fIntegrator(ROOT::Math::IntegratorMultiDim::GetType( rhs.fIntegrator.Name().c_str() ) ), // integrator
224  fXmin( rhs.fXmin),
225  fXmax( rhs.fXmax),
226  fNorm( rhs.fNorm),
227  fNormErr( rhs.fNormErr),
228  fOffset(rhs.fOffset),
229  fMaxPOI(rhs.fMaxPOI),
230  fHasNorm(rhs.fHasNorm),
231  fUseOldValues(rhs.fUseOldValues),
232  fError(rhs.fError),
233  fNormCdfValues(rhs.fNormCdfValues)
234  {
235  fIntegrator.SetFunction(fLikelihood, fXmin.size() );
236  // need special treatment for the smart pointer
237  // if (rhs.fPriorFunc.get() ) {
238  // fPriorFunc = std::shared_ptr<RooFunctor>(new RooFunctor(*(rhs.fPriorFunc) ) );
239  // fLikelihood.SetPrior( fPriorFunc.get() );
240  // }
241  }
242 
243 
244  bool HasError() const { return fError; }
245 
246 
247  ROOT::Math::IGenFunction * Clone() const {
248  ooccoutD((TObject*)0,NumIntegration) << " cloning function .........." << std::endl;
249  return new PosteriorCdfFunction(*this);
250  }
251 
252  // offset value for computing the root
253  void SetOffset(double offset) { fOffset = offset; }
254 
255 private:
256 
257  // make assignment operator private
258  PosteriorCdfFunction& operator=(const PosteriorCdfFunction &) {
259  return *this;
260  }
261 
262  double DoEval (double x) const {
263 
264  // evaluate cdf at poi value x by integrating poi from [xmin,x] and all the nuisances
265  fXmax[0] = x;
266  if (x <= fXmin[0] ) return -fOffset;
267  // could also avoid a function evaluation at maximum
268  if (x >= fMaxPOI && fHasNorm) return 1. - fOffset; // cdf is bound to these values
269 
270  // computes the integral using a previous cdf estimate
271  double normcdf0 = 0;
272  if (fHasNorm && fUseOldValues) {
273  // look in the map of the stored cdf values the closes one
274  std::map<double,double>::iterator itr = fNormCdfValues.upper_bound(x);
275  --itr; // upper bound returns a position 1 up of the value we want
276  if (itr != fNormCdfValues.end() ) {
277  fXmin[0] = itr->first;
278  normcdf0 = itr->second;
279  // ooccoutD((TObject*)0,NumIntegration) << "PosteriorCdfFunction: computing integral between in poi interval : "
280  // << fXmin[0] << " - " << fXmax[0] << std::endl;
281  }
282  }
283 
284  fFunctor.binding().resetNumCall(); // reset number of calls for debug
285 
286  double cdf = fIntegrator.Integral(&fXmin[0],&fXmax[0]);
287  double error = fIntegrator.Error();
288  double normcdf = cdf/fNorm; // normalize the cdf
289 
290  ooccoutD((TObject*)0,NumIntegration) << "PosteriorCdfFunction: poi = [" << fXmin[0] << " , "
291  << fXmax[0] << "] integral = " << cdf << " +/- " << error
292  << " norm-integ = " << normcdf << " cdf(x) = " << normcdf+normcdf0
293  << " ncalls = " << fFunctor.binding().numCall() << std::endl;
294 
295  if (TMath::IsNaN(cdf) || cdf > std::numeric_limits<double>::max()) {
296  ooccoutE((TObject*)0,NumIntegration) << "PosteriorFunction::Error computing integral - cdf = "
297  << cdf << std::endl;
298  fError = true;
299  }
300 
301  if (cdf != 0 && error/cdf > 0.2 )
302  oocoutW((TObject*)0,NumIntegration) << "PosteriorCdfFunction: integration error is larger than 20 % x0 = " << fXmin[0]
303  << " x = " << x << " cdf(x) = " << cdf << " +/- " << error << std::endl;
304 
305  if (!fHasNorm) {
306  oocoutI((TObject*)0,NumIntegration) << "PosteriorCdfFunction - integral of posterior = "
307  << cdf << " +/- " << error << std::endl;
308  fNormErr = error;
309  return cdf;
310  }
311 
312  normcdf += normcdf0;
313 
314  // store values in the map
315  if (fUseOldValues) {
316  fNormCdfValues.insert(std::make_pair(x, normcdf) );
317  }
318 
319  double errnorm = sqrt( error*error + normcdf*normcdf * fNormErr * fNormErr )/fNorm;
320  if (normcdf > 1. + 3 * errnorm) {
321  oocoutW((TObject*)0,NumIntegration) << "PosteriorCdfFunction: normalized cdf values is larger than 1"
322  << " x = " << x << " normcdf(x) = " << normcdf << " +/- " << error/fNorm << std::endl;
323  }
324 
325  return normcdf - fOffset; // apply an offset (for finding the roots)
326  }
327 
328  mutable RooFunctor fFunctor; // functor binding nll
329  mutable std::shared_ptr<RooFunctor> fPriorFunc; // functor binding the prior
330  LikelihoodFunction fLikelihood; // likelihood function
331  mutable ROOT::Math::IntegratorMultiDim fIntegrator; // integrator (mutable because Integral() is not const
332  mutable std::vector<double> fXmin; // min value of parameters (poi+nuis) -
333  mutable std::vector<double> fXmax; // max value of parameters (poi+nuis) - max poi changes so it is mutable
334  double fNorm; // normalization value (computed in ctor)
335  mutable double fNormErr; // normalization error value (computed in ctor)
336  double fOffset; // offset for computing the root
337  double fMaxPOI; // maximum value of POI
338  bool fHasNorm; // flag to control first call to the function
339  bool fUseOldValues; // use old cdf values
340  mutable bool fError; // flag to indicate if a numerical evaluation error occurred
341  mutable std::map<double,double> fNormCdfValues;
342 };
343 
344 //__________________________________________________________________
345 // Posterior Function class
346 // 1-Dim function as function of the poi
347 // and it integrated all the nuisance parameters
348 
349 class PosteriorFunction : public ROOT::Math::IGenFunction {
350 
351 public:
352 
353 
354  PosteriorFunction(RooAbsReal & nll, RooRealVar & poi, RooArgList & nuisParams, RooAbsReal * prior = 0, const char * integType = 0, double
355  norm = 1.0, double nllOffset = 0, int niter = 0) :
356  fFunctor(nll, nuisParams, RooArgList() ),
357  fPriorFunc(nullptr),
358  fLikelihood(fFunctor, 0, nllOffset),
359  fPoi(&poi),
360  fXmin(nuisParams.getSize() ),
361  fXmax(nuisParams.getSize() ),
362  fNorm(norm),
363  fError(0)
364  {
365 
366  if (prior) {
367  fPriorFunc = std::make_shared<RooFunctor>(*prior, nuisParams, RooArgList());
368  fLikelihood.SetPrior(fPriorFunc.get() );
369  }
370 
371  ooccoutD((TObject*)0,NumIntegration) << "PosteriorFunction::Evaluate the posterior function by integrating the nuisances: " << std::endl;
372  for (unsigned int i = 0; i < fXmin.size(); ++i) {
373  RooRealVar & var = (RooRealVar &) nuisParams[i];
374  fXmin[i] = var.getMin();
375  fXmax[i] = var.getMax();
376  ooccoutD((TObject*)0,NumIntegration) << "PosteriorFunction::Integrate " << var.GetName()
377  << " in interval [" << fXmin[i] << " , " << fXmax[i] << " ] " << std::endl;
378  }
379  if (fXmin.size() == 1) { // 1D case
380  fIntegratorOneDim.reset( new ROOT::Math::Integrator(ROOT::Math::IntegratorOneDim::GetType(integType) ) );
381 
382  fIntegratorOneDim->SetFunction(fLikelihood);
383  // interested only in relative tolerance
384  //fIntegratorOneDim->SetAbsTolerance(1.E-300);
385  fIntegratorOneDim->Options().Print(ooccoutD((TObject*)0,NumIntegration) );
386  }
387  else if (fXmin.size() > 1) { // multiDim case
388  fIntegratorMultiDim.reset(new ROOT::Math::IntegratorMultiDim(ROOT::Math::IntegratorMultiDim::GetType(integType) ) );
389  fIntegratorMultiDim->SetFunction(fLikelihood, fXmin.size());
390  ROOT::Math::IntegratorMultiDimOptions opt = fIntegratorMultiDim->Options();
391  if (niter > 0) {
392  opt.SetNCalls(niter);
393  fIntegratorMultiDim->SetOptions(opt);
394  }
395  //fIntegratorMultiDim->SetAbsTolerance(1.E-300);
396  // print the options
397  opt.Print(ooccoutD((TObject*)0,NumIntegration) );
398  }
399  }
400 
401 
402  ROOT::Math::IGenFunction * Clone() const {
403  assert(1);
404  return 0; // cannot clone this function for integrator
405  }
406 
407  double Error() const { return fError;}
408 
409 
410 private:
411  double DoEval (double x) const {
412 
413  // evaluate posterior function at a poi value x by integrating all nuisance parameters
414 
415  fPoi->setVal(x);
416  fFunctor.binding().resetNumCall(); // reset number of calls for debug
417 
418  double f = 0;
419  double error = 0;
420  if (fXmin.size() == 1) { // 1D case
421  f = fIntegratorOneDim->Integral(fXmin[0],fXmax[0]);
422  error = fIntegratorOneDim->Error();
423  }
424  else if (fXmin.size() > 1) { // multi-dim case
425  f = fIntegratorMultiDim->Integral(&fXmin[0],&fXmax[0]);
426  error = fIntegratorMultiDim->Error();
427  } else {
428  // no integration to be done
429  f = fLikelihood(x);
430  }
431 
432  // debug
433  ooccoutD((TObject*)0,NumIntegration) << "PosteriorFunction: POI value = "
434  << x << "\tf(x) = " << f << " +/- " << error
435  << " norm-f(x) = " << f/fNorm
436  << " ncalls = " << fFunctor.binding().numCall() << std::endl;
437 
438 
439 
440 
441  if (f != 0 && error/f > 0.2 )
442  ooccoutW((TObject*)0,NumIntegration) << "PosteriorFunction::DoEval - Error from integration in "
443  << fXmin.size() << " Dim is larger than 20 % "
444  << "x = " << x << " p(x) = " << f << " +/- " << error << std::endl;
445 
446  fError = error / fNorm;
447  return f / fNorm;
448  }
449 
450  mutable RooFunctor fFunctor;
451  mutable std::shared_ptr<RooFunctor> fPriorFunc; // functor binding the prior
452  LikelihoodFunction fLikelihood;
453  RooRealVar * fPoi;
454  std::unique_ptr<ROOT::Math::Integrator> fIntegratorOneDim;
455  std::unique_ptr<ROOT::Math::IntegratorMultiDim> fIntegratorMultiDim;
456  std::vector<double> fXmin;
457  std::vector<double> fXmax;
458  double fNorm;
459  mutable double fError;
460 };
461 
462 ////////////////////////////////////////////////////////////////////////////////
463 /// Posterior function obtaining sampling toy MC for the nuisance according to their pdf
464 
465 class PosteriorFunctionFromToyMC : public ROOT::Math::IGenFunction {
466 
467 public:
468 
469 
470  PosteriorFunctionFromToyMC(RooAbsReal & nll, RooAbsPdf & pdf, RooRealVar & poi, RooArgList & nuisParams, RooAbsReal * prior = 0, double
471  nllOffset = 0, int niter = 0, bool redoToys = true ) :
472  fFunctor(nll, nuisParams, RooArgList() ),
473  fPriorFunc(nullptr),
474  fLikelihood(fFunctor, 0, nllOffset),
475  fPdf(&pdf),
476  fPoi(&poi),
477  fNuisParams(nuisParams),
478  fGenParams(0),
479  fNumIterations(niter),
480  fError(-1),
481  fRedoToys(redoToys)
482  {
483  if (niter == 0) fNumIterations = 100; // default value
484 
485  if (prior) {
486  fPriorFunc = std::make_shared<RooFunctor>(*prior, nuisParams, RooArgList());
487  fLikelihood.SetPrior(fPriorFunc.get() );
488  }
489 
490  ooccoutI((TObject*)0,InputArguments) << "PosteriorFunctionFromToyMC::Evaluate the posterior function by randomizing the nuisances: niter " << fNumIterations << std::endl;
491 
492  ooccoutI((TObject*)0,InputArguments) << "PosteriorFunctionFromToyMC::Pdf used for randomizing the nuisance is " << fPdf->GetName() << std::endl;
493  // check that pdf contains the nuisance
494  RooArgSet * vars = fPdf->getVariables();
495  for (int i = 0; i < fNuisParams.getSize(); ++i) {
496  if (!vars->find( fNuisParams[i].GetName() ) ) {
497  ooccoutW((TObject*)0,InputArguments) << "Nuisance parameter " << fNuisParams[i].GetName()
498  << " is not part of sampling pdf. "
499  << "they will be treated as constant " << std::endl;
500  }
501  }
502  delete vars;
503 
504  if (!fRedoToys) {
505  ooccoutI((TObject*)0,InputArguments) << "PosteriorFunctionFromToyMC::Generate nuisance toys only one time (for all POI points)" << std::endl;
506  GenerateToys();
507  }
508  }
509 
510  virtual ~PosteriorFunctionFromToyMC() { if (fGenParams) delete fGenParams; }
511 
512  // generate first n-samples of the nuisance parameters
513  void GenerateToys() const {
514  if (fGenParams) delete fGenParams;
515  fGenParams = fPdf->generate(fNuisParams, fNumIterations);
516  if(fGenParams==0) {
517  ooccoutE((TObject*)0,InputArguments) << "PosteriorFunctionFromToyMC - failed to generate nuisance parameters" << std::endl;
518  }
519  }
520 
521  double Error() const { return fError;}
522 
523  ROOT::Math::IGenFunction * Clone() const {
524  // use default copy constructor
525  //return new PosteriorFunctionFromToyMC(*this);
526  // clone not implemented
527  assert(1);
528  return 0;
529  }
530 
531 private:
532  // evaluate the posterior at the poi value x
533  double DoEval( double x) const {
534 
535  int npar = fNuisParams.getSize();
536  assert (npar > 0);
537 
538 
539  // generate the toys
540  if (fRedoToys) GenerateToys();
541  if (!fGenParams) return 0;
542 
543  // evaluate posterior function at a poi value x by integrating all nuisance parameters
544 
545  fPoi->setVal(x);
546 
547  // loop over all of the generate data
548  double sum = 0;
549  double sum2 = 0;
550 
551  for(int iter=0; iter<fNumIterations; ++iter) {
552 
553  // get the set of generated parameters and set the nuisance parameters to the generated values
554  std::vector<double> p(npar);
555  for (int i = 0; i < npar; ++i) {
556  const RooArgSet* genset=fGenParams->get(iter);
557  RooAbsArg * arg = genset->find( fNuisParams[i].GetName() );
558  RooRealVar * var = dynamic_cast<RooRealVar*>(arg);
559  assert(var != 0);
560  p[i] = var->getVal();
561  ((RooRealVar &) fNuisParams[i]).setVal(p[i]);
562  }
563 
564  // evaluate now the likelihood function
565  double fval = fLikelihood( &p.front() );
566 
567  // likelihood already must contained the pdf we have sampled
568  // so we must divided by it. The value must be normalized on all
569  // other parameters
570  RooArgSet arg(fNuisParams);
571  double nuisPdfVal = fPdf->getVal(&arg);
572  fval /= nuisPdfVal;
573 
574 
575  if( fval > std::numeric_limits<double>::max() ) {
576  ooccoutE((TObject*)0,Eval) << "BayesianCalculator::EvalPosteriorFunctionFromToy : "
577  << "Likelihood evaluates to infinity " << std::endl;
578  ooccoutE((TObject*)0,Eval) << "poi value = " << x << std::endl;
579  ooccoutE((TObject*)0,Eval) << "Nuisance parameter values : ";
580  for (int i = 0; i < npar; ++i)
581  ooccoutE((TObject*)0,Eval) << fNuisParams[i].GetName() << " = " << p[i] << " ";
582  ooccoutE((TObject*)0,Eval) << " - return 0 " << std::endl;
583 
584  fError = 1.E30;
585  return 0;
586  }
587  if( TMath::IsNaN(fval) ) {
588  ooccoutE((TObject*)0,Eval) << "BayesianCalculator::EvalPosteriorFunctionFromToy : "
589  << "Likelihood is a NaN " << std::endl;
590  ooccoutE((TObject*)0,Eval) << "poi value = " << x << std::endl;
591  ooccoutE((TObject*)0,Eval) << "Nuisance parameter values : ";
592  for (int i = 0; i < npar; ++i)
593  ooccoutE((TObject*)0,Eval) << fNuisParams[i].GetName() << " = " << p[i] << " ";
594  ooccoutE((TObject*)0,Eval) << " - return 0 " << std::endl;
595  fError = 1.E30;
596  return 0;
597  }
598 
599 
600 
601  sum += fval;
602  sum2 += fval*fval;
603  }
604 
605  // compute the average and variance
606  double val = sum/double(fNumIterations);
607  double dval2 = std::max( sum2/double(fNumIterations) - val*val, 0.0);
608  fError = std::sqrt( dval2 / fNumIterations);
609 
610  // debug
611  ooccoutD((TObject*)0,NumIntegration) << "PosteriorFunctionFromToyMC: POI value = "
612  << x << "\tp(x) = " << val << " +/- " << fError << std::endl;
613 
614 
615  if (val != 0 && fError/val > 0.2 ) {
616  ooccoutW((TObject*)0,NumIntegration) << "PosteriorFunctionFromToyMC::DoEval"
617  << " - Error in estimating posterior is larger than 20% ! "
618  << "x = " << x << " p(x) = " << val << " +/- " << fError << std::endl;
619  }
620 
621 
622  return val;
623  }
624 
625  mutable RooFunctor fFunctor;
626  mutable std::shared_ptr<RooFunctor> fPriorFunc; // functor binding the prior
627  LikelihoodFunction fLikelihood;
628  mutable RooAbsPdf * fPdf;
629  RooRealVar * fPoi;
630  RooArgList fNuisParams;
631  mutable RooDataSet * fGenParams;
632  int fNumIterations;
633  mutable double fError;
634  bool fRedoToys; // do toys every iteration
635 
636 };
637 
638 ////////////////////////////////////////////////////////////////////////////////
639 // Implementation of BayesianCalculator
640 ////////////////////////////////////////////////////////////////////////////////
641 
642 ////////////////////////////////////////////////////////////////////////////////
643 /// default constructor
644 
645 BayesianCalculator::BayesianCalculator() :
646  fData(0),
647  fPdf(0),
648  fPriorPdf(0),
649  fNuisancePdf(0),
650  fProductPdf (0), fLogLike(0), fLikelihood (0), fIntegratedLikelihood (0), fPosteriorPdf(0),
651  fPosteriorFunction(0), fApproxPosterior(0),
652  fLower(0), fUpper(0),
653  fNLLMin(0),
654  fSize(0.05), fLeftSideFraction(0.5),
655  fBrfPrecision(0.00005),
656  fNScanBins(-1),
657  fNumIterations(0),
658  fValidInterval(false)
659 {
660 }
661 
662 ////////////////////////////////////////////////////////////////////////////////
663 /// Constructor from data set, model pdf, parameter of interests and prior pdf
664 /// If nuisance parameters are given they will be integrated according either to the prior or
665 /// their constraint term included in the model
666 
667 BayesianCalculator::BayesianCalculator( /* const char* name, const char* title, */
668  RooAbsData& data,
669  RooAbsPdf& pdf,
670  const RooArgSet& POI,
671  RooAbsPdf& priorPdf,
672  const RooArgSet* nuisanceParameters ) :
673  fData(&data),
674  fPdf(&pdf),
675  fPOI(POI),
676  fPriorPdf(&priorPdf),
677  fNuisancePdf(0),
678  fProductPdf (0), fLogLike(0), fLikelihood (0), fIntegratedLikelihood (0), fPosteriorPdf(0),
679  fPosteriorFunction(0), fApproxPosterior(0),
680  fLower(0), fUpper(0),
681  fNLLMin(0),
682  fSize(0.05), fLeftSideFraction(0.5),
683  fBrfPrecision(0.00005),
684  fNScanBins(-1),
685  fNumIterations(0),
686  fValidInterval(false)
687 {
688 
689  if (nuisanceParameters) fNuisanceParameters.add(*nuisanceParameters);
690  // remove constant nuisance parameters
691  RemoveConstantParameters(&fNuisanceParameters);
692 }
693 
694 ////////////////////////////////////////////////////////////////////////////////
695 /// Constructor from a data set and a ModelConfig
696 /// model pdf, poi and nuisances will be taken from the ModelConfig
697 
698 BayesianCalculator::BayesianCalculator( RooAbsData& data,
699  ModelConfig & model) :
700  fData(&data),
701  fPdf(model.GetPdf()),
702  fPriorPdf( model.GetPriorPdf()),
703  fNuisancePdf(0),
704  fProductPdf (0), fLogLike(0), fLikelihood (0), fIntegratedLikelihood (0), fPosteriorPdf(0),
705  fPosteriorFunction(0), fApproxPosterior(0),
706  fLower(0), fUpper(0),
707  fNLLMin(0),
708  fSize(0.05), fLeftSideFraction(0.5),
709  fBrfPrecision(0.00005),
710  fNScanBins(-1),
711  fNumIterations(0),
712  fValidInterval(false)
713 {
714  SetModel(model);
715 }
716 
717 
718 BayesianCalculator::~BayesianCalculator()
719 {
720  // destructor
721  ClearAll();
722 }
723 
724 ////////////////////////////////////////////////////////////////////////////////
725 /// clear all cached pdf objects
726 
727 void BayesianCalculator::ClearAll() const {
728  if (fProductPdf) delete fProductPdf;
729  if (fLogLike) delete fLogLike;
730  if (fLikelihood) delete fLikelihood;
731  if (fIntegratedLikelihood) delete fIntegratedLikelihood;
732  if (fPosteriorPdf) delete fPosteriorPdf;
733  if (fPosteriorFunction) delete fPosteriorFunction;
734  if (fApproxPosterior) delete fApproxPosterior;
735  fPosteriorPdf = 0;
736  fPosteriorFunction = 0;
737  fProductPdf = 0;
738  fLogLike = 0;
739  fLikelihood = 0;
740  fIntegratedLikelihood = 0;
741  fLower = 0;
742  fUpper = 0;
743  fNLLMin = 0;
744  fValidInterval = false;
745 }
746 
747 ////////////////////////////////////////////////////////////////////////////////
748 /// set the model to use
749 /// The model pdf, prior pdf, parameter of interest and nuisances
750 /// will be taken according to the model
751 
752 void BayesianCalculator::SetModel(const ModelConfig & model) {
753 
754  fPdf = model.GetPdf();
755  fPriorPdf = model.GetPriorPdf();
756  // assignment operator = does not do a real copy the sets (must use add method)
757  fPOI.removeAll();
758  fNuisanceParameters.removeAll();
759  fConditionalObs.removeAll();
760  fGlobalObs.removeAll();
761  if (model.GetParametersOfInterest()) fPOI.add( *(model.GetParametersOfInterest()) );
762  if (model.GetNuisanceParameters()) fNuisanceParameters.add( *(model.GetNuisanceParameters() ) );
763  if (model.GetConditionalObservables()) fConditionalObs.add( *(model.GetConditionalObservables() ) );
764  if (model.GetGlobalObservables()) fGlobalObs.add( *(model.GetGlobalObservables() ) );
765  // remove constant nuisance parameters
766  RemoveConstantParameters(&fNuisanceParameters);
767 
768  // invalidate the cached pointers
769  ClearAll();
770 }
771 
772 ////////////////////////////////////////////////////////////////////////////////
773 /// Build and return the posterior function (not normalized) as a RooAbsReal
774 /// the posterior is obtained from the product of the likelihood function and the
775 /// prior pdf which is then integrated in the nuisance parameters (if existing).
776 /// A prior function for the nuisance can be specified either in the prior pdf object
777 /// or in the model itself. If no prior nuisance is specified, but prior parameters are then
778 /// the integration is performed assuming a flat prior for the nuisance parameters.
779 ///
780 /// NOTE: the return object is managed by the BayesianCalculator class, users do not need to delete it,
781 /// but the object will be deleted when the BayesiabCalculator object is deleted
782 
783 RooAbsReal* BayesianCalculator::GetPosteriorFunction() const
784 {
785 
786  if (fIntegratedLikelihood) return fIntegratedLikelihood;
787  if (fLikelihood) return fLikelihood;
788 
789  // run some sanity checks
790  if (!fPdf ) {
791  coutE(InputArguments) << "BayesianCalculator::GetPosteriorPdf - missing pdf model" << std::endl;
792  return 0;
793  }
794  if (fPOI.getSize() == 0) {
795  coutE(InputArguments) << "BayesianCalculator::GetPosteriorPdf - missing parameter of interest" << std::endl;
796  return 0;
797  }
798  if (fPOI.getSize() > 1) {
799  coutE(InputArguments) << "BayesianCalculator::GetPosteriorPdf - current implementation works only on 1D intervals" << std::endl;
800  return 0;
801  }
802 
803 
804  RooArgSet* constrainedParams = fPdf->getParameters(*fData);
805  // remove the constant parameters
806  RemoveConstantParameters(constrainedParams);
807 
808  //constrainedParams->Print("V");
809 
810  // use RooFit::Constrain() to be sure constraints terms are taken into account
811  fLogLike = fPdf->createNLL(*fData, RooFit::Constrain(*constrainedParams), RooFit::ConditionalObservables(fConditionalObs), RooFit::GlobalObservables(fGlobalObs) );
812 
813 
814 
815  ccoutD(Eval) << "BayesianCalculator::GetPosteriorFunction : "
816  << " pdf value " << fPdf->getVal()
817  << " neglogLikelihood = " << fLogLike->getVal() << std::endl;
818 
819  if (fPriorPdf)
820  ccoutD(Eval) << "\t\t\t priorPOI value " << fPriorPdf->getVal() << std::endl;
821 
822  // check that likelihood evaluation is not infinity
823  double nllVal = fLogLike->getVal();
824  if ( nllVal > std::numeric_limits<double>::max() ) {
825  coutE(Eval) << "BayesianCalculator::GetPosteriorFunction : "
826  << " Negative log likelihood evaluates to infinity " << std::endl
827  << " Non-const Parameter values : ";
828  RooArgList p(*constrainedParams);
829  for (int i = 0; i < p.getSize(); ++i) {
830  RooRealVar * v = dynamic_cast<RooRealVar *>(&p[i] );
831  if (v!=0) ccoutE(Eval) << v->GetName() << " = " << v->getVal() << " ";
832  }
833  ccoutE(Eval) << std::endl;
834  ccoutE(Eval) << "-- Perform a full likelihood fit of the model before or set more reasonable parameter values"
835  << std::endl;
836  coutE(Eval) << "BayesianCalculator::GetPosteriorFunction : " << " cannot compute posterior function " << std::endl;
837  return 0;
838  }
839 
840 
841 
842  // need do find minimum of log-likelihood in the range to shift function
843  // to avoid numerical errors when we compute the likelihood (overflows in the exponent)
844  // N.B.: this works for only 1 parameter of interest otherwise Minuit should be used for finding the minimum
845  RooFunctor * nllFunc = fLogLike->functor(fPOI);
846  assert(nllFunc);
847  ROOT::Math::Functor1D wnllFunc(*nllFunc);
848  RooRealVar* poi = dynamic_cast<RooRealVar*>( fPOI.first() );
849  assert(poi);
850 
851  // try to reduce some error messages
852  //Bool_t silentMode = (RooMsgService::instance().globalKillBelow() >= RooFit::ERROR || RooMsgService::instance().silentMode()) ;
853  RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CountErrors);
854 
855 
856  coutI(Eval) << "BayesianCalculator::GetPosteriorFunction : "
857  << " nll value " << nllVal << " poi value = " << poi->getVal() << std::endl;
858 
859 
860  ROOT::Math::BrentMinimizer1D minim;
861  minim.SetFunction(wnllFunc,poi->getMin(),poi->getMax() );
862  bool ret = minim.Minimize(100,1.E-3,1.E-3);
863  fNLLMin = 0;
864  if (ret) fNLLMin = minim.FValMinimum();
865 
866  coutI(Eval) << "BayesianCalculator::GetPosteriorFunction : minimum of NLL vs POI for POI = "
867  << poi->getVal() << " min NLL = " << fNLLMin << std::endl;
868 
869  delete nllFunc;
870 
871  delete constrainedParams;
872 
873 
874  if ( fNuisanceParameters.getSize() == 0 || fIntegrationType.Contains("ROOFIT") ) {
875 
876  ccoutD(Eval) << "BayesianCalculator::GetPosteriorFunction : use ROOFIT integration "
877  << std::endl;
878 
879 #ifdef DOLATER // (not clear why this does not work)
880  // need to make in this case a likelihood from the nll and make the product with the prior
881  TString likeName = TString("likelihood_times_prior_") + TString(fPriorPdf->GetName());
882  TString formula;
883  formula.Form("exp(-@0+%f+log(@1))",fNLLMin);
884  fLikelihood = new RooFormulaVar(likeName,formula,RooArgList(*fLogLike,*fPriorPdf));
885 #else
886  // here use RooProdPdf (not very nice) but working
887 
888  if (fLogLike) delete fLogLike;
889  if (fProductPdf) {
890  delete fProductPdf;
891  fProductPdf = 0;
892  }
893 
894  // // create a unique name for the product pdf
895  RooAbsPdf * pdfAndPrior = fPdf;
896  if (fPriorPdf) {
897  TString prodName = TString("product_") + TString(fPdf->GetName()) + TString("_") + TString(fPriorPdf->GetName() );
898  // save this as data member since it needs to be deleted afterwards
899  fProductPdf = new RooProdPdf(prodName,"",RooArgList(*fPdf,*fPriorPdf));
900  pdfAndPrior = fProductPdf;
901  }
902 
903  RooArgSet* constrParams = fPdf->getParameters(*fData);
904  // remove the constant parameters
905  RemoveConstantParameters(constrParams);
906  fLogLike = pdfAndPrior->createNLL(*fData, RooFit::Constrain(*constrParams),RooFit::ConditionalObservables(fConditionalObs),RooFit::GlobalObservables(fGlobalObs) );
907  delete constrParams;
908 
909  TString likeName = TString("likelihood_times_prior_") + TString(pdfAndPrior->GetName());
910  TString formula;
911  formula.Form("exp(-@0+%f)",fNLLMin);
912  fLikelihood = new RooFormulaVar(likeName,formula,RooArgList(*fLogLike));
913 #endif
914 
915 
916  // if no nuisance parameter we can just return the likelihood function
917  if (fNuisanceParameters.getSize() == 0) {
918  fIntegratedLikelihood = fLikelihood;
919  fLikelihood = 0;
920  }
921  else
922  // case of using RooFit for the integration
923  fIntegratedLikelihood = fLikelihood->createIntegral(fNuisanceParameters);
924 
925 
926  }
927 
928  else if ( fIntegrationType.Contains("TOYMC") ) {
929  // compute the posterior as expectation values of the likelihood function
930  // sampling on the nuisance parameters
931 
932  RooArgList nuisParams(fNuisanceParameters);
933 
934  bool doToysEveryIteration = true;
935  // if type is 1-TOYMC or TOYMC-1
936  if ( fIntegrationType.Contains("1") || fIntegrationType.Contains("ONE") ) doToysEveryIteration = false;
937 
938  RooAbsPdf * samplingPdf = (fNuisancePdf) ? fNuisancePdf : fPdf;
939  if (!fNuisancePdf) {
940  ccoutI(Eval) << "BayesianCalculator::GetPosteriorFunction : no nuisance pdf is provided, try using global pdf (this will be slower)"
941  << std::endl;
942  }
943  fPosteriorFunction = new PosteriorFunctionFromToyMC(*fLogLike, *samplingPdf, *poi, nuisParams, fPriorPdf, fNLLMin,
944  fNumIterations, doToysEveryIteration );
945 
946  TString name = "toyposteriorfunction_from_";
947  name += fLogLike->GetName();
948  fIntegratedLikelihood = new RooFunctor1DBinding(name,name,*fPosteriorFunction,*poi);
949 
950  // need to scan likelihood in this case
951  if (fNScanBins <= 0) fNScanBins = 100;
952 
953  }
954 
955  else {
956 
957  // use ROOT integration method if there are nuisance parameters
958 
959  RooArgList nuisParams(fNuisanceParameters);
960  fPosteriorFunction = new PosteriorFunction(*fLogLike, *poi, nuisParams, fPriorPdf, fIntegrationType, 1., fNLLMin, fNumIterations );
961 
962  TString name = "posteriorfunction_from_";
963  name += fLogLike->GetName();
964  fIntegratedLikelihood = new RooFunctor1DBinding(name,name,*fPosteriorFunction,*poi);
965 
966  }
967 
968 
969  if (RooAbsReal::numEvalErrors() > 0)
970  coutW(Eval) << "BayesianCalculator::GetPosteriorFunction : " << RooAbsReal::numEvalErrors() << " errors reported in evaluating log-likelihood function "
971  << std::endl;
972  RooAbsReal::clearEvalErrorLog();
973  RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors);
974 
975  return fIntegratedLikelihood;
976 
977 }
978 
979 ////////////////////////////////////////////////////////////////////////////////
980 /// Build and return the posterior pdf (i.e posterior function normalized to all range of poi)
981 /// Note that an extra integration in the POI is required for the normalization
982 /// NOTE: user must delete the returned object
983 
984 RooAbsPdf* BayesianCalculator::GetPosteriorPdf() const
985 {
986 
987  RooAbsReal * plike = GetPosteriorFunction();
988  if (!plike) return 0;
989 
990 
991  // create a unique name on the posterior from the names of the components
992  TString posteriorName = this->GetName() + TString("_posteriorPdf_") + plike->GetName();
993 
994  RooAbsPdf * posteriorPdf = new RooGenericPdf(posteriorName,"@0",*plike);
995 
996  return posteriorPdf;
997 }
998 
999 ////////////////////////////////////////////////////////////////////////////////
1000 /// When am approximate posterior is computed binninig the parameter of interest (poi) range
1001 /// (see SetScanOfPosteriors) an histogram is created and can be returned to the user
1002 /// A nullptr is instead returned when the posterior is computed without binning the poi.
1003 ///
1004 /// NOTE: the returned object is managed by the BayesianCalculator class,
1005 /// if the user wants to take ownership of the returned histogram, he needs to clone
1006 /// or copy the return object.
1007 
1008 TH1 * BayesianCalculator::GetPosteriorHistogram() const
1009 {
1010  return (fApproxPosterior) ? fApproxPosterior->GetHistogram() : nullptr;
1011 }
1012 
1013 
1014 ////////////////////////////////////////////////////////////////////////////////
1015 /// return a RooPlot with the posterior and the credibility region
1016 /// NOTE: User takes ownership of the returned object
1017 
1018 RooPlot* BayesianCalculator::GetPosteriorPlot(bool norm, double precision ) const
1019 {
1020 
1021  GetPosteriorFunction();
1022 
1023  // if a scan is requested approximate the posterior
1024  if (fNScanBins > 0)
1025  ApproximatePosterior();
1026 
1027  RooAbsReal * posterior = fIntegratedLikelihood;
1028  if (norm) {
1029  // delete and re-do always posterior pdf (could be invalid after approximating it)
1030  if (fPosteriorPdf) delete fPosteriorPdf;
1031  fPosteriorPdf = GetPosteriorPdf();
1032  posterior = fPosteriorPdf;
1033  }
1034  if (!posterior) return 0;
1035 
1036  if (!fValidInterval) GetInterval();
1037 
1038  RooAbsRealLValue* poi = dynamic_cast<RooAbsRealLValue*>( fPOI.first() );
1039  assert(poi);
1040 
1041 
1042  RooPlot* plot = poi->frame();
1043  if (!plot) return 0;
1044 
1045  // try to reduce some error messages
1046  RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CountErrors);
1047 
1048  plot->SetTitle(TString("Posterior probability of parameter \"")+TString(poi->GetName())+TString("\""));
1049  posterior->plotOn(plot,RooFit::Range(fLower,fUpper,kFALSE),RooFit::VLines(),RooFit::DrawOption("F"),RooFit::MoveToBack(),RooFit::FillColor(kGray),RooFit::Precision(precision));
1050  posterior->plotOn(plot);
1051  plot->GetYaxis()->SetTitle("posterior function");
1052 
1053  // reset the counts and default mode
1054  RooAbsReal::clearEvalErrorLog();
1055  RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors);
1056 
1057  return plot;
1058 }
1059 
1060 ////////////////////////////////////////////////////////////////////////////////
1061 /// set the integration type (possible type are) :
1062 ///
1063 /// - 1D integration ( used when only one nuisance and when the posterior is scanned):
1064 /// adaptive , gauss, nonadaptive
1065 /// - multidim:
1066 /// - ADAPTIVE, adaptive numerical integration
1067 /// The parameter numIters (settable with SetNumIters) is the max number of function calls.
1068 /// It can be reduced to make the integration faster but it will be difficult to reach the required tolerance
1069 /// - VEGAS MC integration method based on importance sampling - numIters is number of function calls
1070 /// Extra Vegas parameter can be set using IntegratorMultiDimOptions class
1071 /// - MISER MC integration method based on stratified sampling
1072 /// See also http://en.wikipedia.org/wiki/Monte_Carlo_integration for VEGAS and MISER description
1073 /// - PLAIN simple MC integration method, where the max number of calls can be specified using SetNumIters(numIters)
1074 ///
1075 /// Extra integration types are:
1076 ///
1077 /// - TOYMC:
1078 /// evaluate posterior by generating toy MC for the nuisance parameters. It is a MC
1079 /// integration, where the function is sampled according to the nuisance. It is convenient to use when all
1080 /// the nuisance are uncorrelated and it is efficient to generate them
1081 /// The toy are generated by default for each poi values
1082 /// (this method has been proposed and provided by J.P Chou)
1083 /// - 1-TOYMC : same method as before but in this case the toys are generated only one time and then used for
1084 /// each poi value. It can be convenient when the generation time is much larger than the evaluation time,
1085 /// otherwise it is recommended to re-generate the toy for each poi scanned point of the posterior function
1086 /// - ROOFIT:
1087 /// use roofit default integration methods which will produce a nested integral (not recommended for more
1088 /// than 1 nuisance parameters)
1089 
1090 void BayesianCalculator::SetIntegrationType(const char * type) {
1091  // if type = 0 use default specified via class IntegratorMultiDimOptions::SetDefaultIntegrator
1092  fIntegrationType = TString(type);
1093  fIntegrationType.ToUpper();
1094 }
1095 
1096 ////////////////////////////////////////////////////////////////////////////////
1097 /// Compute the interval. By Default a central interval is computed
1098 /// and the result is a SimpleInterval object.
1099 ///
1100 /// Using the method (to be called before SetInterval) SetLeftSideTailFraction the user can choose the type of interval.
1101 /// By default the returned interval is a central interval with the confidence level specified
1102 /// previously in the constructor ( LeftSideTailFraction = 0.5).
1103 /// - For lower limit use SetLeftSideTailFraction = 1
1104 /// - For upper limit use SetLeftSideTailFraction = 0
1105 /// - for shortest intervals use SetLeftSideTailFraction = -1 or call the method SetShortestInterval()
1106 ///
1107 /// NOTE: The BayesianCalculator covers only the case with one
1108 /// single parameter of interest
1109 ///
1110 /// NOTE: User takes ownership of the returned object
1111 
1112 SimpleInterval* BayesianCalculator::GetInterval() const
1113 {
1114 
1115  if (fValidInterval)
1116  coutW(Eval) << "BayesianCalculator::GetInterval - recomputing interval for the same CL and same model" << std::endl;
1117 
1118  RooRealVar* poi = dynamic_cast<RooRealVar*>( fPOI.first() );
1119  if (!poi) {
1120  coutE(Eval) << "BayesianCalculator::GetInterval - no parameter of interest is set " << std::endl;
1121  return 0;
1122  }
1123 
1124 
1125 
1126  // get integrated likelihood (posterior function)
1127  GetPosteriorFunction();
1128 
1129  //Bool_t silentMode = (RooMsgService::instance().globalKillBelow() >= RooFit::ERROR || RooMsgService::instance().silentMode()) ;
1130  RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::CountErrors);
1131 
1132  if (fLeftSideFraction < 0 ) {
1133  // compute short intervals
1134  ComputeShortestInterval();
1135  }
1136  else {
1137  // compute the other intervals
1138 
1139  double lowerCutOff = fLeftSideFraction * fSize;
1140  double upperCutOff = 1. - (1.- fLeftSideFraction) * fSize;
1141 
1142 
1143  if (fNScanBins > 0) {
1144  ComputeIntervalFromApproxPosterior(lowerCutOff, upperCutOff);
1145  }
1146 
1147  else {
1148  // use integration method if there are nuisance parameters
1149  if (fNuisanceParameters.getSize() > 0) {
1150  ComputeIntervalFromCdf(lowerCutOff, upperCutOff);
1151  }
1152  else {
1153  // case of no nuisance - just use createCdf from roofit
1154  ComputeIntervalUsingRooFit(lowerCutOff, upperCutOff);
1155  }
1156  // case cdf failed (scan then the posterior)
1157  if (!fValidInterval) {
1158  fNScanBins = 100;
1159  coutW(Eval) << "BayesianCalculator::GetInterval - computing integral from cdf failed - do a scan in "
1160  << fNScanBins << " nbins " << std::endl;
1161  ComputeIntervalFromApproxPosterior(lowerCutOff, upperCutOff);
1162  }
1163  }
1164  }
1165 
1166 
1167  // reset the counts and default mode
1168  if (RooAbsReal::numEvalErrors() > 0)
1169  coutW(Eval) << "BayesianCalculator::GetInterval : " << RooAbsReal::numEvalErrors() << " errors reported in evaluating log-likelihood function "
1170  << std::endl;
1171 
1172  RooAbsReal::clearEvalErrorLog();
1173  RooAbsReal::setEvalErrorLoggingMode(RooAbsReal::PrintErrors);
1174 
1175  if (!fValidInterval) {
1176  fLower = 1; fUpper = 0;
1177  coutE(Eval) << "BayesianCalculator::GetInterval - cannot compute a valid interval - return a dummy [1,0] interval"
1178  << std::endl;
1179  }
1180  else {
1181  coutI(Eval) << "BayesianCalculator::GetInterval - found a valid interval : [" << fLower << " , "
1182  << fUpper << " ]" << std::endl;
1183  }
1184 
1185  TString interval_name = TString("BayesianInterval_a") + TString(this->GetName());
1186  SimpleInterval * interval = new SimpleInterval(interval_name,*poi,fLower,fUpper,ConfidenceLevel());
1187  interval->SetTitle("SimpleInterval from BayesianCalculator");
1188 
1189  return interval;
1190 }
1191 
1192 ////////////////////////////////////////////////////////////////////////////////
1193 /// Returns the value of the parameter for the point in
1194 /// parameter-space that is the most likely.
1195 /// How do we do if there are points that are equi-probable?
1196 /// use approximate posterior
1197 /// t.b.d use real function to find the mode
1198 
1199 double BayesianCalculator::GetMode() const {
1200 
1201  ApproximatePosterior();
1202  TH1 * h = fApproxPosterior->GetHistogram();
1203  return h->GetBinCenter(h->GetMaximumBin() );
1204  //return fApproxPosterior->GetMaximumX();
1205 }
1206 
1207 ////////////////////////////////////////////////////////////////////////////////
1208 /// internal function compute the interval using RooFit
1209 
1210 void BayesianCalculator::ComputeIntervalUsingRooFit(double lowerCutOff, double upperCutOff ) const {
1211 
1212  coutI(Eval) << "BayesianCalculator: Compute interval using RooFit: posteriorPdf + createCdf + RooBrentRootFinder " << std::endl;
1213 
1214  RooRealVar* poi = dynamic_cast<RooRealVar*>( fPOI.first() );
1215  assert(poi);
1216 
1217  fValidInterval = false;
1218  if (!fPosteriorPdf) fPosteriorPdf = (RooAbsPdf*) GetPosteriorPdf();
1219  if (!fPosteriorPdf) return;
1220 
1221  RooAbsReal* cdf = fPosteriorPdf->createCdf(fPOI,RooFit::ScanNoCdf());
1222  if (!cdf) return;
1223 
1224  RooAbsFunc* cdf_bind = cdf->bindVars(fPOI,&fPOI);
1225  if (!cdf_bind) return;
1226 
1227  RooBrentRootFinder brf(*cdf_bind);
1228  brf.setTol(fBrfPrecision); // set the brf precision
1229 
1230  double tmpVal = poi->getVal(); // patch used because findRoot changes the value of poi
1231 
1232  bool ret = true;
1233  if (lowerCutOff > 0) {
1234  double y = lowerCutOff;
1235  ret &= brf.findRoot(fLower,poi->getMin(),poi->getMax(),y);
1236  }
1237  else
1238  fLower = poi->getMin();
1239 
1240  if (upperCutOff < 1.0) {
1241  double y=upperCutOff;
1242  ret &= brf.findRoot(fUpper,poi->getMin(),poi->getMax(),y);
1243  }
1244  else
1245  fUpper = poi->getMax();
1246  if (!ret) coutE(Eval) << "BayesianCalculator::GetInterval "
1247  << "Error returned from Root finder, estimated interval is not fully correct"
1248  << std::endl;
1249  else
1250  fValidInterval = true;
1251 
1252 
1253  poi->setVal(tmpVal); // patch: restore the original value of poi
1254 
1255  delete cdf_bind;
1256  delete cdf;
1257 }
1258 
1259 ////////////////////////////////////////////////////////////////////////////////
1260 /// internal function compute the interval using Cdf integration
1261 
1262 void BayesianCalculator::ComputeIntervalFromCdf(double lowerCutOff, double upperCutOff ) const {
1263 
1264  fValidInterval = false;
1265 
1266  coutI(InputArguments) << "BayesianCalculator:GetInterval Compute the interval from the posterior cdf " << std::endl;
1267 
1268  RooRealVar* poi = dynamic_cast<RooRealVar*>( fPOI.first() );
1269  assert(poi);
1270  if (GetPosteriorFunction() == 0) {
1271  coutE(InputArguments) << "BayesianCalculator::GetInterval() cannot make posterior Function " << std::endl;
1272  return;
1273  }
1274 
1275  // need to remove the constant parameters
1276  RooArgList bindParams;
1277  bindParams.add(fPOI);
1278  bindParams.add(fNuisanceParameters);
1279 
1280  // this code could be put inside the PosteriorCdfFunction
1281 
1282  //bindParams.Print("V");
1283 
1284  PosteriorCdfFunction cdf(*fLogLike, bindParams, fPriorPdf, fIntegrationType, fNLLMin );
1285  if( cdf.HasError() ) {
1286  coutE(Eval) << "BayesianCalculator: Numerical error computing CDF integral - try a different method " << std::endl;
1287  return;
1288  }
1289 
1290  //find the roots
1291 
1292  ROOT::Math::RootFinder rf(kRootFinderType);
1293 
1294  ccoutD(Eval) << "BayesianCalculator::GetInterval - finding roots of posterior using RF " << rf.Name()
1295  << " with precision = " << fBrfPrecision;
1296 
1297  if (lowerCutOff > 0) {
1298  cdf.SetOffset(lowerCutOff);
1299  ccoutD(NumIntegration) << "Integrating posterior to get cdf and search lower limit at p =" << lowerCutOff << std::endl;
1300  bool ok = rf.Solve(cdf, poi->getMin(),poi->getMax() , 200,fBrfPrecision, fBrfPrecision);
1301  if( cdf.HasError() )
1302  coutW(Eval) << "BayesianCalculator: Numerical error integrating the CDF " << std::endl;
1303  if (!ok) {
1304  coutE(NumIntegration) << "BayesianCalculator::GetInterval - Error from root finder when searching lower limit !" << std::endl;
1305  return;
1306  }
1307  fLower = rf.Root();
1308  }
1309  else {
1310  fLower = poi->getMin();
1311  }
1312  if (upperCutOff < 1.0) {
1313  cdf.SetOffset(upperCutOff);
1314  ccoutD(NumIntegration) << "Integrating posterior to get cdf and search upper interval limit at p =" << upperCutOff << std::endl;
1315  bool ok = rf.Solve(cdf, fLower,poi->getMax() , 200, fBrfPrecision, fBrfPrecision);
1316  if( cdf.HasError() )
1317  coutW(Eval) << "BayesianCalculator: Numerical error integrating the CDF " << std::endl;
1318  if (!ok) {
1319  coutE(NumIntegration) << "BayesianCalculator::GetInterval - Error from root finder when searching upper limit !" << std::endl;
1320  return;
1321  }
1322  fUpper = rf.Root();
1323  }
1324  else {
1325  fUpper = poi->getMax();
1326  }
1327 
1328  fValidInterval = true;
1329 }
1330 
1331 ////////////////////////////////////////////////////////////////////////////////
1332 /// approximate posterior in nbins using a TF1
1333 /// scan the poi values and evaluate the posterior at each point
1334 /// and save the result in a cloned TF1
1335 /// For each point the posterior is evaluated by integrating the nuisance
1336 /// parameters
1337 
1338 void BayesianCalculator::ApproximatePosterior() const {
1339 
1340  if (fApproxPosterior) {
1341  // if number of bins of existing function is >= requested one - no need to redo the scan
1342  if (fApproxPosterior->GetNpx() >= fNScanBins) return;
1343  // otherwise redo the scan
1344  delete fApproxPosterior;
1345  fApproxPosterior = 0;
1346  }
1347 
1348 
1349  RooAbsReal * posterior = GetPosteriorFunction();
1350  if (!posterior) return;
1351 
1352 
1353  TF1 * tmp = posterior->asTF(fPOI);
1354  assert(tmp != 0);
1355  // binned the function in nbins and evaluate at those points
1356  if (fNScanBins > 0) tmp->SetNpx(fNScanBins); // if not use default of TF1 (which is 100)
1357 
1358  coutI(Eval) << "BayesianCalculator - scan posterior function in nbins = " << tmp->GetNpx() << std::endl;
1359 
1360  fApproxPosterior = (TF1*) tmp->Clone();
1361  // save this function for future reuse
1362  // I can delete now original posterior and use this approximated copy
1363  delete tmp;
1364  TString name = posterior->GetName() + TString("_approx");
1365  TString title = posterior->GetTitle() + TString("_approx");
1366  RooAbsReal * posterior2 = new RooTFnBinding(name,title,fApproxPosterior,fPOI);
1367  if (posterior == fIntegratedLikelihood) {
1368  delete fIntegratedLikelihood;
1369  fIntegratedLikelihood = posterior2;
1370  }
1371  else if (posterior == fLikelihood) {
1372  delete fLikelihood;
1373  fLikelihood = posterior2;
1374  }
1375  else {
1376  assert(1); // should never happen this case
1377  }
1378 }
1379 
1380 ////////////////////////////////////////////////////////////////////////////////
1381 /// compute the interval using the approximate posterior function
1382 
1383 void BayesianCalculator::ComputeIntervalFromApproxPosterior(double lowerCutOff, double upperCutOff ) const {
1384 
1385  ccoutD(Eval) << "BayesianCalculator: Compute interval from the approximate posterior " << std::endl;
1386 
1387  ApproximatePosterior();
1388  if (!fApproxPosterior) return;
1389 
1390  double prob[2];
1391  double limits[2] = {0,0};
1392  prob[0] = lowerCutOff;
1393  prob[1] = upperCutOff;
1394  fApproxPosterior->GetQuantiles(2,limits,prob);
1395  fLower = limits[0];
1396  fUpper = limits[1];
1397  fValidInterval = true;
1398 }
1399 
1400 ////////////////////////////////////////////////////////////////////////////////
1401 /// compute the shortest interval from the histogram representing the posterior
1402 
1403 
1404 void BayesianCalculator::ComputeShortestInterval( ) const {
1405  coutI(Eval) << "BayesianCalculator - computing shortest interval with CL = " << 1.-fSize << std::endl;
1406 
1407  // compute via the approx posterior function
1408  ApproximatePosterior();
1409  if (!fApproxPosterior) return;
1410 
1411  TH1D * h1 = dynamic_cast<TH1D*>(fApproxPosterior->GetHistogram() );
1412  assert(h1 != 0);
1413  h1->SetName(fApproxPosterior->GetName());
1414  // get bins and sort them
1415  double * bins = h1->GetArray();
1416  // exclude under/overflow bins
1417  int n = h1->GetSize()-2;
1418  std::vector<int> index(n);
1419  // exclude bins[0] (the underflow bin content)
1420  TMath::Sort(n, bins+1, &index[0]);
1421  // find cut off as test size
1422  double sum = 0;
1423  double actualCL = 0;
1424  double upper = h1->GetXaxis()->GetXmin();
1425  double lower = h1->GetXaxis()->GetXmax();
1426  double norm = h1->GetSumOfWeights();
1427 
1428  for (int i = 0; i < n; ++i) {
1429  int idx = index[i];
1430  double p = bins[ idx] / norm;
1431  sum += p;
1432  if (sum > 1.-fSize ) {
1433  actualCL = sum - p;
1434  break;
1435  }
1436 
1437  // histogram bin content starts from 1
1438  if ( h1->GetBinLowEdge(idx+1) < lower)
1439  lower = h1->GetBinLowEdge(idx+1);
1440  if ( h1->GetXaxis()->GetBinUpEdge(idx+1) > upper)
1441  upper = h1->GetXaxis()->GetBinUpEdge(idx+1);
1442  }
1443 
1444  ccoutD(Eval) << "BayesianCalculator::ComputeShortestInterval - actual interval CL = "
1445  << actualCL << " difference from requested is " << (actualCL-(1.-fSize))/fSize*100. << "% "
1446  << " limits are [ " << lower << " , " << " upper ] " << std::endl;
1447 
1448 
1449  if (lower < upper) {
1450  fLower = lower;
1451  fUpper = upper;
1452 
1453 
1454 
1455  if ( std::abs(actualCL-(1.-fSize)) > 0.1*(1.-fSize) )
1456  coutW(Eval) << "BayesianCalculator::ComputeShortestInterval - actual interval CL = "
1457  << actualCL << " differs more than 10% from desired CL value - must increase nbins "
1458  << n << " to an higher value " << std::endl;
1459  }
1460  else
1461  coutE(Eval) << "BayesianCalculator::ComputeShortestInterval " << n << " bins are not sufficient " << std::endl;
1462 
1463  fValidInterval = true;
1464 
1465 }
1466 
1467 
1468 
1469 
1470 } // end namespace RooStats