Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
ProfileLikelihoodCalculator.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id$
2 // Author: Kyle Cranmer 28/07/2008
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class RooStats::ProfileLikelihoodCalculator
13  \ingroup Roostats
14 
15 The ProfileLikelihoodCalculator is a concrete implementation of CombinedCalculator
16 (the interface class for tools which can produce both a RooStats HypoTestResult
17 and ConfInterval). The tool uses the profile likelihood ratio as a test statistic,
18 and assumes that Wilks' theorem is valid. Wilks' theorem states that \f$ -2 \cdot \ln(\lambda) \f$
19 (profile likelihood ratio) is asymptotically distributed as a \f$ \chi^2 \f$ distribution
20 with \f$ N \f$ degrees of freedom. Thus, \f$p\f$-values can be
21 constructed, and the profile likelihood ratio can be used to construct a
22 LikelihoodInterval. (In the future, this class could be extended to use toy
23 Monte Carlo to calibrate the distribution of the test statistic).
24 
25 Usage: It uses the interface of the CombinedCalculator, so that it can be
26 configured by specifying:
27 
28  - A model common model (*e.g.* a family of specific models, which includes both
29  the null and alternate)
30  - A data set
31  - A set of parameters of interest. The nuisance parameters will be all other
32  parameters of the model.
33  - A set of parameters which specify the null hypothesis (including values
34  and const/non-const status).
35 
36 The interface allows one to pass the model, data, and parameters either directly
37 or via a ModelConfig class. The alternate hypothesis leaves the parameter free
38 to take any value other than those specified by the null hypothesis. There is
39 therefore no need to specify the alternate parameters.
40 
41 After configuring the calculator, one only needs to call GetHypoTest() (which
42 will return a HypoTestResult pointer) or GetInterval() (which will return a
43 ConfInterval pointer).
44 
45 This calculator can work with both one-dimensional intervals or multi-
46 dimensional ones (contours).
47 
48 Note that for hypothesis tests, it is often better to use the
49 AsymptoticCalculator, which can compute in addition the expected
50 \f$p\f$-value using an Asimov data set.
51 
52 */
53 
55 
56 #include "RooStats/RooStatsUtils.h"
57 
60 
61 #include "RooFitResult.h"
62 #include "RooRealVar.h"
63 #include "RooProfileLL.h"
64 #include "RooNLLVar.h"
65 #include "RooGlobalFunc.h"
66 #include "RooMsgService.h"
67 
68 #include "Math/MinimizerOptions.h"
69 #include "RooMinimizer.h"
70 //#include "RooProdPdf.h"
71 
72 using namespace std;
73 
74 ClassImp(RooStats::ProfileLikelihoodCalculator); ;
75 
76 using namespace RooFit;
77 using namespace RooStats;
78 
79 
80 ////////////////////////////////////////////////////////////////////////////////
81 /// default constructor
82 
83 ProfileLikelihoodCalculator::ProfileLikelihoodCalculator() :
84  CombinedCalculator(), fFitResult(0), fGlobalFitDone(false)
85 {
86 }
87 
88 ProfileLikelihoodCalculator::ProfileLikelihoodCalculator(RooAbsData& data, RooAbsPdf& pdf, const RooArgSet& paramsOfInterest,
89  Double_t size, const RooArgSet* nullParams ) :
90  CombinedCalculator(data,pdf, paramsOfInterest, size, nullParams ),
91  fFitResult(0), fGlobalFitDone(false)
92 {
93  // constructor from pdf and parameters
94  // the pdf must contain eventually the nuisance parameters
95 }
96 
97 ProfileLikelihoodCalculator::ProfileLikelihoodCalculator(RooAbsData& data, ModelConfig& model, Double_t size) :
98  CombinedCalculator(data, model, size),
99  fFitResult(0), fGlobalFitDone(false)
100 {
101  // construct from a ModelConfig. Assume data model.GetPdf() will provide full description of model including
102  // constraint term on the nuisances parameters
103  assert(model.GetPdf() );
104 }
105 
106 
107 ////////////////////////////////////////////////////////////////////////////////
108 /// destructor
109 /// cannot delete prod pdf because it will delete all the composing pdf's
110 /// if (fOwnPdf) delete fPdf;
111 /// fPdf = 0;
112 
113 ProfileLikelihoodCalculator::~ProfileLikelihoodCalculator(){
114  if (fFitResult) delete fFitResult;
115 }
116 
117 void ProfileLikelihoodCalculator::DoReset() const {
118  // reset and clear fit result
119  // to be called when a new model or data are set in the calculator
120  if (fFitResult) delete fFitResult;
121  fFitResult = 0;
122 }
123 
124 RooAbsReal * ProfileLikelihoodCalculator::DoGlobalFit() const {
125  // perform a global fit of the likelihood letting with all parameter of interest and
126  // nuisance parameters
127  // keep the list of fitted parameters
128 
129  DoReset();
130  RooAbsPdf * pdf = GetPdf();
131  RooAbsData* data = GetData();
132  if (!data || !pdf ) return 0;
133 
134  // get all non-const parameters
135  RooArgSet* constrainedParams = pdf->getParameters(*data);
136  if (!constrainedParams) return 0;
137  RemoveConstantParameters(constrainedParams);
138 
139  const auto& config = GetGlobalRooStatsConfig();
140  RooAbsReal * nll = pdf->createNLL(*data, CloneData(true), Constrain(*constrainedParams),ConditionalObservables(fConditionalObs), GlobalObservables(fGlobalObs),
141  RooFit::Offset(config.useLikelihoodOffset) );
142 
143  // check if global fit has been already done
144  if (fFitResult && fGlobalFitDone) {
145  delete constrainedParams;
146  return nll;
147  }
148 
149  // calculate MLE
150  oocoutP((TObject*)0,Minimization) << "ProfileLikelihoodCalcultor::DoGLobalFit - find MLE " << std::endl;
151 
152  if (fFitResult) delete fFitResult;
153  fFitResult = DoMinimizeNLL(nll);
154 
155  // print fit result
156  if (fFitResult) {
157  fFitResult->printStream( oocoutI((TObject*)0,Minimization), fFitResult->defaultPrintContents(0), fFitResult->defaultPrintStyle(0) );
158 
159  if (fFitResult->status() != 0)
160  oocoutW((TObject*)0,Minimization) << "ProfileLikelihoodCalcultor::DoGlobalFit - Global fit failed - status = " << fFitResult->status() << std::endl;
161  else
162  fGlobalFitDone = true;
163  }
164 
165  delete constrainedParams;
166  return nll;
167 }
168 
169 RooFitResult * ProfileLikelihoodCalculator::DoMinimizeNLL(RooAbsReal * nll) {
170  // Minimizer the given NLL using the default options
171 
172  const char * minimType = ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str();
173  const char * minimAlgo = ROOT::Math::MinimizerOptions::DefaultMinimizerAlgo().c_str();
174  int strategy = ROOT::Math::MinimizerOptions::DefaultStrategy();
175  int level = ROOT::Math::MinimizerOptions::DefaultPrintLevel() -1;// RooFit level starts from -1
176  int tolerance = ROOT::Math::MinimizerOptions::DefaultTolerance();
177  oocoutP((TObject*)0,Minimization) << "ProfileLikelihoodCalcultor::DoMinimizeNLL - using " << minimType << " / " << minimAlgo << " with strategy " << strategy << std::endl;
178  // do global fit and store fit result for further use
179 
180  const auto& config = GetGlobalRooStatsConfig();
181 
182  RooMinimizer minim(*nll);
183  minim.setStrategy(strategy);
184  minim.setEps(tolerance);
185  minim.setPrintLevel(level);
186  minim.optimizeConst(2); // to optimize likelihood calculations
187  minim.setEvalErrorWall(config.useEvalErrorWall);
188 
189  int status = -1;
190  for (int tries = 1, maxtries = 4; tries <= maxtries; ++tries) {
191  status = minim.minimize(minimType,minimAlgo);
192  if (status%1000 == 0) { // ignore erros from Improve
193  break;
194  } else if (tries < maxtries) {
195  cout << " ----> Doing a re-scan first" << endl;
196  minim.minimize(minimType,"Scan");
197  if (tries == 2) {
198  if (strategy == 0 ) {
199  cout << " ----> trying with strategy = 1" << endl;;
200  minim.setStrategy(1);
201  }
202  else
203  tries++; // skip this trial if strategy is already 1
204  }
205  if (tries == 3) {
206  cout << " ----> trying with improve" << endl;;
207  minimType = "Minuit";
208  minimAlgo = "migradimproved";
209  }
210  }
211  }
212 
213  RooFitResult * result = minim.save();
214 
215 
216  return result;
217 }
218 
219 ////////////////////////////////////////////////////////////////////////////////
220 /// Main interface to get a RooStats::ConfInterval.
221 /// It constructs a profile likelihood ratio, and uses that to construct a RooStats::LikelihoodInterval.
222 
223 LikelihoodInterval* ProfileLikelihoodCalculator::GetInterval() const {
224 // RooAbsPdf* pdf = fWS->pdf(fPdfName);
225 // RooAbsData* data = fWS->data(fDataName);
226  RooAbsPdf * pdf = GetPdf();
227  RooAbsData* data = GetData();
228  if (!data || !pdf || fPOI.getSize() == 0) return 0;
229 
230  RooArgSet* constrainedParams = pdf->getParameters(*data);
231  RemoveConstantParameters(constrainedParams);
232 
233 
234  /*
235  RooNLLVar* nll = new RooNLLVar("nll","",*pdf,*data, Extended(),Constrain(*constrainedParams));
236  RooProfileLL* profile = new RooProfileLL("pll","",*nll, *fPOI);
237  profile->addOwnedComponents(*nll) ; // to avoid memory leak
238  */
239 
240  // do a global fit cloning the data
241  RooAbsReal * nll = DoGlobalFit();
242  if (!nll) return 0;
243 
244  if (!fFitResult) {
245  delete nll;
246  return 0;
247  }
248 
249  RooAbsReal* profile = nll->createProfile(fPOI);
250  profile->addOwnedComponents(*nll) ; // to avoid memory leak
251 
252  // t.b.f. " RooProfileLL should keep and provide possibility to query on global minimum
253  // set POI to fit value (this will speed up profileLL calculation of global minimum)
254  const RooArgList & fitParams = fFitResult->floatParsFinal();
255  for (int i = 0; i < fitParams.getSize(); ++i) {
256  RooRealVar & fitPar = (RooRealVar &) fitParams[i];
257  RooRealVar * par = (RooRealVar*) fPOI.find( fitPar.GetName() );
258  if (par) {
259  par->setVal( fitPar.getVal() );
260  par->setError( fitPar.getError() );
261  }
262  }
263 
264  // do this so profile will cache inside the absolute minimum and
265  // minimum values of nuisance parameters
266  // (no need to this here)
267  // profile->getVal();
268  //RooMsgService::instance().setGlobalKillBelow(RooFit::DEBUG) ;
269  // profile->Print();
270 
271  TString name = TString("LikelihoodInterval_");// + TString(GetName() );
272 
273  // make a list of fPOI with fit result values and pass to LikelihoodInterval class
274  // bestPOI is a cloned list of POI only with their best fit values
275  TIter iter = fPOI.createIterator();
276  RooArgSet fitParSet(fitParams);
277  RooArgSet * bestPOI = new RooArgSet();
278  while (RooAbsArg * arg = (RooAbsArg*) iter.Next() ) {
279  RooAbsArg * p = fitParSet.find( arg->GetName() );
280  if (p) bestPOI->addClone(*p);
281  else bestPOI->addClone(*arg);
282  }
283  // fPOI contains the parameter of interest of the PL object
284  // and bestPOI contains a snapshot with the best fit values
285  LikelihoodInterval* interval = new LikelihoodInterval(name, profile, &fPOI, bestPOI);
286  interval->SetConfidenceLevel(1.-fSize);
287  delete constrainedParams;
288  return interval;
289 }
290 
291 ////////////////////////////////////////////////////////////////////////////////
292 /// Main interface to get a HypoTestResult.
293 /// It does two fits:
294 /// 1. The first lets the null parameters float, so it's a maximum likelihood estimate.
295 /// 2. The second is to the null model (fixing null parameters to their specified values): *e.g.* a conditional maximum likelihood.
296 /// Since not all parameters are floating, this likelihood will be lower than the unconditional model.
297 ///
298 /// The ratio of the likelihood obtained from the conditional MLE to the MLE is the profile likelihood ratio.
299 /// Wilks' theorem is used to get \f$p\f$-values.
300 
301 HypoTestResult* ProfileLikelihoodCalculator::GetHypoTest() const {
302 // RooAbsPdf* pdf = fWS->pdf(fPdfName);
303 // RooAbsData* data = fWS->data(fDataName);
304  RooAbsPdf * pdf = GetPdf();
305  RooAbsData* data = GetData();
306 
307 
308  if (!data || !pdf) return 0;
309 
310  if (fNullParams.getSize() == 0) return 0;
311 
312  // make a clone and ordered list since a vector will be associated to keep parameter values
313  // clone the list since first fit will changes the fNullParams values
314  RooArgList poiList;
315  poiList.addClone(fNullParams); // make a clone list
316 
317 
318  // do a global fit
319  RooAbsReal * nll = DoGlobalFit();
320  if (!nll) return 0;
321 
322  if (!fFitResult) {
323  delete nll;
324  return 0;
325  }
326 
327  RooArgSet* constrainedParams = pdf->getParameters(*data);
328  RemoveConstantParameters(constrainedParams);
329 
330  Double_t nLLatMLE = fFitResult->minNll();
331  // in case of using offset need to save offset value
332  Double_t nlloffset = (RooStats::IsNLLOffset() ) ? nll->getVal() - nLLatMLE : 0;
333 
334  // set POI to given values, set constant, calculate conditional MLE
335  std::vector<double> oldValues(poiList.getSize() );
336  for (unsigned int i = 0; i < oldValues.size(); ++i) {
337  RooRealVar * mytarget = (RooRealVar*) constrainedParams->find(poiList[i].GetName());
338  if (mytarget) {
339  oldValues[i] = mytarget->getVal();
340  mytarget->setVal( ( (RooRealVar&) poiList[i] ).getVal() );
341  mytarget->setConstant(kTRUE);
342  }
343  }
344 
345 
346 
347  // perform the fit only if nuisance parameters are available
348  // get nuisance parameters
349  // nuisance parameters are the non const parameters from the likelihood parameters
350  RooArgSet nuisParams(*constrainedParams);
351 
352  // need to remove the parameter of interest
353  RemoveConstantParameters(&nuisParams);
354 
355  // check there are variable parameter in order to do a fit
356  bool existVarParams = false;
357  TIter it = nuisParams.createIterator();
358  RooRealVar * myarg = 0;
359  while ((myarg = (RooRealVar *)it.Next())) {
360  if ( !myarg->isConstant() ) {
361  existVarParams = true;
362  break;
363  }
364  }
365 
366  Double_t nLLatCondMLE = nLLatMLE;
367  if (existVarParams) {
368  oocoutP((TObject*)0,Minimization) << "ProfileLikelihoodCalcultor::GetHypoTest - do conditional fit " << std::endl;
369 
370  RooFitResult * fit2 = DoMinimizeNLL(nll);
371 
372  // print fit result
373  if (fit2) {
374  nLLatCondMLE = fit2->minNll();
375  fit2->printStream( oocoutI((TObject*)0,Minimization), fit2->defaultPrintContents(0), fit2->defaultPrintStyle(0) );
376 
377  if (fit2->status() != 0)
378  oocoutW((TObject*)0,Minimization) << "ProfileLikelihoodCalcultor::GetHypotest - Conditional fit failed - status = " << fit2->status() << std::endl;
379  }
380 
381  }
382  else {
383  // get just the likelihood value (no need to do a fit since the likelihood is a constant function)
384  nLLatCondMLE = nll->getVal();
385  // this value contains the offset
386  if (RooStats::IsNLLOffset() ) nLLatCondMLE -= nlloffset;
387  }
388 
389  // Use Wilks' theorem to translate -2 log lambda into a significance/p-value
390  Double_t deltaNLL = std::max( nLLatCondMLE-nLLatMLE, 0.);
391 
392  // get number of free parameter of interest
393  RemoveConstantParameters(poiList);
394  int ndf = poiList.getSize();
395 
396  Double_t pvalue = ROOT::Math::chisquared_cdf_c( 2* deltaNLL, ndf);
397 
398  // in case of one dimension (1 poi) do the one-sided p-value (need to divide by 2)
399  if (ndf == 1) pvalue = 0.5 * pvalue;
400 
401  TString name = TString("ProfileLRHypoTestResult_");// + TString(GetName() );
402  HypoTestResult* htr = new HypoTestResult(name, pvalue, 0 );
403 
404  // restore previous value of poi
405  for (unsigned int i = 0; i < oldValues.size(); ++i) {
406  RooRealVar * mytarget = (RooRealVar*) constrainedParams->find(poiList[i].GetName());
407  if (mytarget) {
408  mytarget->setVal(oldValues[i] );
409  mytarget->setConstant(false);
410  }
411  }
412 
413  delete constrainedParams;
414  delete nll;
415  return htr;
416 
417 }