Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
StandardHypoTestDemo.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_roostats
3 /// \notebook
4 /// Standard tutorial macro for hypothesis test (for computing the discovery significance) using all
5 /// RooStats hypotheiss tests calculators and test statistics.
6 ///
7 /// Usage:
8 ///
9 /// ~~~{.cpp}
10 /// root>.L StandardHypoTestDemo.C
11 /// root> StandardHypoTestDemo("fileName","workspace name","S+B modelconfig name","B model name","data set
12 /// name",calculator type, test statistic type, number of toys)
13 ///
14 /// type = 0 Freq calculator
15 /// type = 1 Hybrid calculator
16 /// type = 2 Asymptotic calculator
17 /// type = 3 Asymptotic calculator using nominal Asimov data sets (not using fitted parameter values but nominal ones)
18 ///
19 /// testStatType = 0 LEP
20 /// = 1 Tevatron
21 /// = 2 Profile Likelihood
22 /// = 3 Profile Likelihood one sided (i.e. = 0 if mu_hat < 0)
23 /// ~~~
24 ///
25 /// \macro_image
26 /// \macro_output
27 /// \macro_code
28 ///
29 /// \author Lorenzo Moneta
30 
31 #include "TFile.h"
32 #include "RooWorkspace.h"
33 #include "RooAbsPdf.h"
34 #include "RooRealVar.h"
35 #include "RooDataSet.h"
36 #include "RooStats/ModelConfig.h"
37 #include "RooRandom.h"
38 #include "TGraphErrors.h"
39 #include "TGraphAsymmErrors.h"
40 #include "TCanvas.h"
41 #include "TLine.h"
42 #include "TSystem.h"
43 #include "TROOT.h"
44 
48 #include "RooStats/ToyMCSampler.h"
49 #include "RooStats/HypoTestPlot.h"
50 
56 
60 
61 #include <cassert>
62 
63 using namespace RooFit;
64 using namespace RooStats;
65 
66 struct HypoTestOptions {
67 
68  bool noSystematics = false; // force all systematics to be off (i.e. set all nuisance parameters as constat
69  double nToysRatio = 4; // ratio Ntoys Null/ntoys ALT
70  double poiValue = -1; // change poi snapshot value for S+B model (needed for expected p0 values)
71  int printLevel = 0;
72  bool generateBinned = false; // for binned generation
73  bool useProof = false; // use Proof
74  bool enableDetailedOutput = false; // for detailed output
75 };
76 
77 HypoTestOptions optHT;
78 
79 void StandardHypoTestDemo(const char *infile = "", const char *workspaceName = "combined",
80  const char *modelSBName = "ModelConfig", const char *modelBName = "",
81  const char *dataName = "obsData", int calcType = 0, /* 0 freq 1 hybrid, 2 asymptotic */
82  int testStatType = 3, /* 0 LEP, 1 TeV, 2 LHC, 3 LHC - one sided*/
83  int ntoys = 5000, bool useNC = false, const char *nuisPriorName = 0)
84 {
85 
86  bool noSystematics = optHT.noSystematics;
87  double nToysRatio = optHT.nToysRatio; // ratio Ntoys Null/ntoys ALT
88  double poiValue = optHT.poiValue; // change poi snapshot value for S+B model (needed for expected p0 values)
89  int printLevel = optHT.printLevel;
90  bool generateBinned = optHT.generateBinned; // for binned generation
91  bool useProof = optHT.useProof; // use Proof
92  bool enableDetOutput = optHT.enableDetailedOutput;
93 
94  // Other Parameter to pass in tutorial
95  // apart from standard for filename, ws, modelconfig and data
96 
97  // type = 0 Freq calculator
98  // type = 1 Hybrid calculator
99  // type = 2 Asymptotic calculator
100 
101  // testStatType = 0 LEP
102  // = 1 Tevatron
103  // = 2 Profile Likelihood
104  // = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)
105 
106  // ntoys: number of toys to use
107 
108  // useNumberCounting: set to true when using number counting events
109 
110  // nuisPriorName: name of prior for the nuisance. This is often expressed as constraint term in the global model
111  // It is needed only when using the HybridCalculator (type=1)
112  // If not given by default the prior pdf from ModelConfig is used.
113 
114  // extra options are available as global parameters of the macro. They major ones are:
115 
116  // generateBinned generate binned data sets for toys (default is false) - be careful not to activate with
117  // a too large (>=3) number of observables
118  // nToyRatio ratio of S+B/B toys (default is 2)
119  // printLevel
120 
121  // disable - can cause some problems
122  // ToyMCSampler::SetAlwaysUseMultiGen(true);
123 
124  SimpleLikelihoodRatioTestStat::SetAlwaysReuseNLL(true);
125  ProfileLikelihoodTestStat::SetAlwaysReuseNLL(true);
126  RatioOfProfiledLikelihoodsTestStat::SetAlwaysReuseNLL(true);
127 
128  // RooRandom::randomGenerator()->SetSeed(0);
129 
130  // to change minimizers
131  // ~~~{.bash}
132  // ROOT::Math::MinimizerOptions::SetDefaultStrategy(0);
133  // ROOT::Math::MinimizerOptions::SetDefaultMinimizer("Minuit2");
134  // ROOT::Math::MinimizerOptions::SetDefaultTolerance(1);
135  // ~~~
136 
137  // -------------------------------------------------------
138  // First part is just to access a user-defined file
139  // or create the standard example file if it doesn't exist
140  const char *filename = "";
141  if (!strcmp(infile, "")) {
142  filename = "results/example_combined_GaussExample_model.root";
143  bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
144  // if file does not exists generate with histfactory
145  if (!fileExist) {
146 #ifdef _WIN32
147  cout << "HistFactory file cannot be generated on Windows - exit" << endl;
148  return;
149 #endif
150  // Normally this would be run on the command line
151  cout << "will run standard hist2workspace example" << endl;
152  gROOT->ProcessLine(".! prepareHistFactory .");
153  gROOT->ProcessLine(".! hist2workspace config/example.xml");
154  cout << "\n\n---------------------" << endl;
155  cout << "Done creating example input" << endl;
156  cout << "---------------------\n\n" << endl;
157  }
158 
159  } else
160  filename = infile;
161 
162  // Try to open the file
163  TFile *file = TFile::Open(filename);
164 
165  // if input file was specified byt not found, quit
166  if (!file) {
167  cout << "StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
168  return;
169  }
170 
171  // -------------------------------------------------------
172  // Tutorial starts here
173  // -------------------------------------------------------
174 
175  // get the workspace out of the file
176  RooWorkspace *w = (RooWorkspace *)file->Get(workspaceName);
177  if (!w) {
178  cout << "workspace not found" << endl;
179  return;
180  }
181  w->Print();
182 
183  // get the modelConfig out of the file
184  ModelConfig *sbModel = (ModelConfig *)w->obj(modelSBName);
185 
186  // get the modelConfig out of the file
187  RooAbsData *data = w->data(dataName);
188 
189  // make sure ingredients are found
190  if (!data || !sbModel) {
191  w->Print();
192  cout << "data or ModelConfig was not found" << endl;
193  return;
194  }
195  // make b model
196  ModelConfig *bModel = (ModelConfig *)w->obj(modelBName);
197 
198  // case of no systematics
199  // remove nuisance parameters from model
200  if (noSystematics) {
201  const RooArgSet *nuisPar = sbModel->GetNuisanceParameters();
202  if (nuisPar && nuisPar->getSize() > 0) {
203  std::cout << "StandardHypoTestInvDemo"
204  << " - Switch off all systematics by setting them constant to their initial values" << std::endl;
205  RooStats::SetAllConstant(*nuisPar);
206  }
207  if (bModel) {
208  const RooArgSet *bnuisPar = bModel->GetNuisanceParameters();
209  if (bnuisPar)
210  RooStats::SetAllConstant(*bnuisPar);
211  }
212  }
213 
214  if (!bModel) {
215  Info("StandardHypoTestInvDemo", "The background model %s does not exist", modelBName);
216  Info("StandardHypoTestInvDemo", "Copy it from ModelConfig %s and set POI to zero", modelSBName);
217  bModel = (ModelConfig *)sbModel->Clone();
218  bModel->SetName(TString(modelSBName) + TString("B_only"));
219  RooRealVar *var = dynamic_cast<RooRealVar *>(bModel->GetParametersOfInterest()->first());
220  if (!var)
221  return;
222  double oldval = var->getVal();
223  var->setVal(0);
224  // bModel->SetSnapshot( RooArgSet(*var, *w->var("lumi")) );
225  bModel->SetSnapshot(RooArgSet(*var));
226  var->setVal(oldval);
227  }
228 
229  if (!sbModel->GetSnapshot() || poiValue > 0) {
230  Info("StandardHypoTestDemo", "Model %s has no snapshot - make one using model poi", modelSBName);
231  RooRealVar *var = dynamic_cast<RooRealVar *>(sbModel->GetParametersOfInterest()->first());
232  if (!var)
233  return;
234  double oldval = var->getVal();
235  if (poiValue > 0)
236  var->setVal(poiValue);
237  // sbModel->SetSnapshot( RooArgSet(*var, *w->var("lumi") ) );
238  sbModel->SetSnapshot(RooArgSet(*var));
239  if (poiValue > 0)
240  var->setVal(oldval);
241  // sbModel->SetSnapshot( *sbModel->GetParametersOfInterest() );
242  }
243 
244  // part 1, hypothesis testing
245  SimpleLikelihoodRatioTestStat *slrts = new SimpleLikelihoodRatioTestStat(*bModel->GetPdf(), *sbModel->GetPdf());
246  // null parameters must includes snapshot of poi plus the nuisance values
247  RooArgSet nullParams(*bModel->GetSnapshot());
248  if (bModel->GetNuisanceParameters())
249  nullParams.add(*bModel->GetNuisanceParameters());
250 
251  slrts->SetNullParameters(nullParams);
252  RooArgSet altParams(*sbModel->GetSnapshot());
253  if (sbModel->GetNuisanceParameters())
254  altParams.add(*sbModel->GetNuisanceParameters());
255  slrts->SetAltParameters(altParams);
256 
257  ProfileLikelihoodTestStat *profll = new ProfileLikelihoodTestStat(*bModel->GetPdf());
258 
259  RatioOfProfiledLikelihoodsTestStat *ropl =
260  new RatioOfProfiledLikelihoodsTestStat(*bModel->GetPdf(), *sbModel->GetPdf(), sbModel->GetSnapshot());
261  ropl->SetSubtractMLE(false);
262 
263  if (testStatType == 3)
264  profll->SetOneSidedDiscovery(1);
265  profll->SetPrintLevel(printLevel);
266 
267  if (enableDetOutput) {
268  slrts->EnableDetailedOutput();
269  profll->EnableDetailedOutput();
270  ropl->EnableDetailedOutput();
271  }
272 
273  /* profll.SetReuseNLL(mOptimize);*/
274  /* slrts.SetReuseNLL(mOptimize);*/
275  /* ropl.SetReuseNLL(mOptimize);*/
276 
277  AsymptoticCalculator::SetPrintLevel(printLevel);
278 
279  HypoTestCalculatorGeneric *hypoCalc = 0;
280  // note here Null is B and Alt is S+B
281  if (calcType == 0)
282  hypoCalc = new FrequentistCalculator(*data, *sbModel, *bModel);
283  else if (calcType == 1)
284  hypoCalc = new HybridCalculator(*data, *sbModel, *bModel);
285  else if (calcType == 2)
286  hypoCalc = new AsymptoticCalculator(*data, *sbModel, *bModel);
287 
288  if (calcType == 0) {
289  ((FrequentistCalculator *)hypoCalc)->SetToys(ntoys, ntoys / nToysRatio);
290  if (enableDetOutput)
291  ((FrequentistCalculator *)hypoCalc)->StoreFitInfo(true);
292  }
293  if (calcType == 1) {
294  ((HybridCalculator *)hypoCalc)->SetToys(ntoys, ntoys / nToysRatio);
295  // n. a. yetif (enableDetOutput) ((HybridCalculator*) hypoCalc)->StoreFitInfo(true);
296  }
297  if (calcType == 2) {
298  if (testStatType == 3)
299  ((AsymptoticCalculator *)hypoCalc)->SetOneSidedDiscovery(true);
300  if (testStatType != 2 && testStatType != 3)
301  Warning("StandardHypoTestDemo",
302  "Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL");
303  }
304 
305  // check for nuisance prior pdf in case of nuisance parameters
306  if (calcType == 1 && (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters())) {
307  RooAbsPdf *nuisPdf = 0;
308  if (nuisPriorName)
309  nuisPdf = w->pdf(nuisPriorName);
310  // use prior defined first in bModel (then in SbModel)
311  if (!nuisPdf) {
312  Info("StandardHypoTestDemo",
313  "No nuisance pdf given for the HybridCalculator - try to deduce pdf from the model");
314  if (bModel->GetPdf() && bModel->GetObservables())
315  nuisPdf = RooStats::MakeNuisancePdf(*bModel, "nuisancePdf_bmodel");
316  else
317  nuisPdf = RooStats::MakeNuisancePdf(*sbModel, "nuisancePdf_sbmodel");
318  }
319  if (!nuisPdf) {
320  if (bModel->GetPriorPdf()) {
321  nuisPdf = bModel->GetPriorPdf();
322  Info("StandardHypoTestDemo",
323  "No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",
324  nuisPdf->GetName());
325  } else {
326  Error("StandardHypoTestDemo", "Cannot run Hybrid calculator because no prior on the nuisance parameter is "
327  "specified or can be derived");
328  return;
329  }
330  }
331  assert(nuisPdf);
332  Info("StandardHypoTestDemo", "Using as nuisance Pdf ... ");
333  nuisPdf->Print();
334 
335  const RooArgSet *nuisParams =
336  (bModel->GetNuisanceParameters()) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters();
337  RooArgSet *np = nuisPdf->getObservables(*nuisParams);
338  if (np->getSize() == 0) {
339  Warning("StandardHypoTestDemo",
340  "Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range");
341  }
342  delete np;
343 
344  ((HybridCalculator *)hypoCalc)->ForcePriorNuisanceAlt(*nuisPdf);
345  ((HybridCalculator *)hypoCalc)->ForcePriorNuisanceNull(*nuisPdf);
346  }
347 
348  /* hypoCalc->ForcePriorNuisanceAlt(*sbModel->GetPriorPdf());*/
349  /* hypoCalc->ForcePriorNuisanceNull(*bModel->GetPriorPdf());*/
350 
351  ToyMCSampler *sampler = (ToyMCSampler *)hypoCalc->GetTestStatSampler();
352 
353  if (sampler && (calcType == 0 || calcType == 1)) {
354 
355  // look if pdf is number counting or extended
356  if (sbModel->GetPdf()->canBeExtended()) {
357  if (useNC)
358  Warning("StandardHypoTestDemo", "Pdf is extended: but number counting flag is set: ignore it ");
359  } else {
360  // for not extended pdf
361  if (!useNC) {
362  int nEvents = data->numEntries();
363  Info("StandardHypoTestDemo",
364  "Pdf is not extended: number of events to generate taken from observed data set is %d", nEvents);
365  sampler->SetNEventsPerToy(nEvents);
366  } else {
367  Info("StandardHypoTestDemo", "using a number counting pdf");
368  sampler->SetNEventsPerToy(1);
369  }
370  }
371 
372  if (data->isWeighted() && !generateBinned) {
373  Info("StandardHypoTestDemo", "Data set is weighted, nentries = %d and sum of weights = %8.1f but toy "
374  "generation is unbinned - it would be faster to set generateBinned to true\n",
375  data->numEntries(), data->sumEntries());
376  }
377  if (generateBinned)
378  sampler->SetGenerateBinned(generateBinned);
379 
380  // use PROOF
381  if (useProof) {
382  ProofConfig pc(*w, 0, "", kFALSE);
383  sampler->SetProofConfig(&pc); // enable proof
384  }
385 
386  // set the test statistic
387  if (testStatType == 0)
388  sampler->SetTestStatistic(slrts);
389  if (testStatType == 1)
390  sampler->SetTestStatistic(ropl);
391  if (testStatType == 2 || testStatType == 3)
392  sampler->SetTestStatistic(profll);
393  }
394 
395  HypoTestResult *htr = hypoCalc->GetHypoTest();
396  htr->SetPValueIsRightTail(true);
397  htr->SetBackgroundAsAlt(false);
398  htr->Print(); // how to get meaningful CLs at this point?
399 
400  delete sampler;
401  delete slrts;
402  delete ropl;
403  delete profll;
404 
405  if (calcType != 2) {
406  HypoTestPlot *plot = new HypoTestPlot(*htr, 100);
407  plot->SetLogYaxis(true);
408  plot->Draw();
409  } else {
410  std::cout << "Asymptotic results " << std::endl;
411  }
412 
413  // look at expected significances
414  // found median of S+B distribution
415  if (calcType != 2) {
416 
417  SamplingDistribution *altDist = htr->GetAltDistribution();
418  HypoTestResult htExp("Expected Result");
419  htExp.Append(htr);
420  // find quantiles in alt (S+B) distribution
421  double p[5];
422  double q[5];
423  for (int i = 0; i < 5; ++i) {
424  double sig = -2 + i;
425  p[i] = ROOT::Math::normal_cdf(sig, 1);
426  }
427  std::vector<double> values = altDist->GetSamplingDistribution();
428  TMath::Quantiles(values.size(), 5, &values[0], q, p, false);
429 
430  for (int i = 0; i < 5; ++i) {
431  htExp.SetTestStatisticData(q[i]);
432  double sig = -2 + i;
433  std::cout << " Expected p -value and significance at " << sig << " sigma = " << htExp.NullPValue()
434  << " significance " << htExp.Significance() << " sigma " << std::endl;
435  }
436  } else {
437  // case of asymptotic calculator
438  for (int i = 0; i < 5; ++i) {
439  double sig = -2 + i;
440  // sigma is inverted here
441  double pval = AsymptoticCalculator::GetExpectedPValues(htr->NullPValue(), htr->AlternatePValue(), -sig, false);
442  std::cout << " Expected p -value and significance at " << sig << " sigma = " << pval << " significance "
443  << ROOT::Math::normal_quantile_c(pval, 1) << " sigma " << std::endl;
444  }
445  }
446 
447  // write result in a file in case of toys
448  bool writeResult = (calcType != 2);
449 
450  if (enableDetOutput) {
451  writeResult = true;
452  Info("StandardHypoTestDemo", "Detailed output will be written in output result file");
453  }
454 
455  if (htr != NULL && writeResult) {
456 
457  // write to a file the results
458  const char *calcTypeName = (calcType == 0) ? "Freq" : (calcType == 1) ? "Hybr" : "Asym";
459  TString resultFileName = TString::Format("%s_HypoTest_ts%d_", calcTypeName, testStatType);
460  // strip the / from the filename
461 
462  TString name = infile;
463  name.Replace(0, name.Last('/') + 1, "");
464  resultFileName += name;
465 
466  TFile *fileOut = new TFile(resultFileName, "RECREATE");
467  htr->Write();
468  Info("StandardHypoTestDemo", "HypoTestResult has been written in the file %s", resultFileName.Data());
469 
470  fileOut->Close();
471  }
472 }