Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
StandardHypoTestInvDemo.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_roostats
3 /// \notebook
4 /// Standard tutorial macro for performing an inverted hypothesis test for computing an interval
5 ///
6 /// This macro will perform a scan of the p-values for computing the interval or limit
7 ///
8 /// Usage:
9 ///
10 /// ~~~{.cpp}
11 /// root>.L StandardHypoTestInvDemo.C
12 /// root> StandardHypoTestInvDemo("fileName","workspace name","S+B modelconfig name","B model name","data set
13 /// name",calculator type, test statistic type, use CLS,
14 /// number of points, xmin, xmax, number of toys, use number counting)
15 ///
16 /// type = 0 Freq calculator
17 /// type = 1 Hybrid calculator
18 /// type = 2 Asymptotic calculator
19 /// type = 3 Asymptotic calculator using nominal Asimov data sets (not using fitted parameter values but nominal ones)
20 ///
21 /// testStatType = 0 LEP
22 /// = 1 Tevatron
23 /// = 2 Profile Likelihood two sided
24 /// = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)
25 /// = 4 Profile Likelihood signed ( pll = -pll if mu < mu_hat)
26 /// = 5 Max Likelihood Estimate as test statistic
27 /// = 6 Number of observed event as test statistic
28 /// ~~~
29 ///
30 /// \macro_image
31 /// \macro_output
32 /// \macro_code
33 ///
34 /// \author Lorenzo Moneta
35 
36 #include "TFile.h"
37 #include "RooWorkspace.h"
38 #include "RooAbsPdf.h"
39 #include "RooRealVar.h"
40 #include "RooDataSet.h"
41 #include "RooStats/ModelConfig.h"
42 #include "RooRandom.h"
43 #include "TGraphErrors.h"
44 #include "TGraphAsymmErrors.h"
45 #include "TCanvas.h"
46 #include "TLine.h"
47 #include "TROOT.h"
48 #include "TSystem.h"
49 
53 #include "RooStats/ToyMCSampler.h"
54 #include "RooStats/HypoTestPlot.h"
55 
62 
66 
67 #include <cassert>
68 
69 using namespace RooFit;
70 using namespace RooStats;
71 using namespace std;
72 
73 // structure defining the options
74 struct HypoTestInvOptions {
75 
76  bool plotHypoTestResult = true; // plot test statistic result at each point
77  bool writeResult = true; // write HypoTestInverterResult in a file
78  TString resultFileName; // file with results (by default is built automatically using the workspace input file name)
79  bool optimize = true; // optimize evaluation of test statistic
80  bool useVectorStore = true; // convert data to use new roofit data store
81  bool generateBinned = false; // generate binned data sets
82  bool noSystematics = false; // force all systematics to be off (i.e. set all nuisance parameters as constat
83  // to their nominal values)
84  double nToysRatio = 2; // ratio Ntoys S+b/ntoysB
85  double maxPOI = -1; // max value used of POI (in case of auto scan)
86  bool useProof = false; // use Proof Lite when using toys (for freq or hybrid)
87  int nworkers = 0; // number of worker for ProofLite (default use all available cores)
88  bool enableDetailedOutput =
89  false; // enable detailed output with all fit information for each toys (output will be written in result file)
90  bool rebuild = false; // re-do extra toys for computing expected limits and rebuild test stat
91  // distributions (N.B this requires much more CPU (factor is equivalent to nToyToRebuild)
92  int nToyToRebuild = 100; // number of toys used to rebuild
93  int rebuildParamValues = 0; // = 0 do a profile of all the parameters on the B (alt snapshot) before performing a
94  // rebuild operation (default)
95  // = 1 use initial workspace parameters with B snapshot values
96  // = 2 use all initial workspace parameters with B
97  // Otherwise the rebuild will be performed using
98  int initialFit = -1; // do a first fit to the model (-1 : default, 0 skip fit, 1 do always fit)
99  int randomSeed = -1; // random seed (if = -1: use default value, if = 0 always random )
100  // NOTE: Proof uses automatically a random seed
101 
102  int nAsimovBins = 0; // number of bins in observables used for Asimov data sets (0 is the default and it is given by
103  // workspace, typically is 100)
104 
105  bool reuseAltToys = false; // reuse same toys for alternate hypothesis (if set one gets more stable bands)
106  double confLevel = 0.95; // confidence level value
107 
108  std::string minimizerType =
109  ""; // minimizer type (default is what is in ROOT::Math::MinimizerOptions::DefaultMinimizerType()
110  std::string massValue = ""; // extra string to tag output file of result
111  int printLevel = 0; // print level for debugging PL test statistics and calculators
112 
113  bool useNLLOffset = false; // use NLL offset when fitting (this increase stability of fits)
114 };
115 
116 HypoTestInvOptions optHTInv;
117 
118 // internal class to run the inverter and more
119 
120 namespace RooStats {
121 
122 class HypoTestInvTool {
123 
124 public:
125  HypoTestInvTool();
126  ~HypoTestInvTool(){};
127 
128  HypoTestInverterResult *RunInverter(RooWorkspace *w, const char *modelSBName, const char *modelBName,
129  const char *dataName, int type, int testStatType, bool useCLs, int npoints,
130  double poimin, double poimax, int ntoys, bool useNumberCounting = false,
131  const char *nuisPriorName = 0);
132 
133  void AnalyzeResult(HypoTestInverterResult *r, int calculatorType, int testStatType, bool useCLs, int npoints,
134  const char *fileNameBase = 0);
135 
136  void SetParameter(const char *name, const char *value);
137  void SetParameter(const char *name, bool value);
138  void SetParameter(const char *name, int value);
139  void SetParameter(const char *name, double value);
140 
141 private:
142  bool mPlotHypoTestResult;
143  bool mWriteResult;
144  bool mOptimize;
145  bool mUseVectorStore;
146  bool mGenerateBinned;
147  bool mUseProof;
148  bool mRebuild;
149  bool mReuseAltToys;
150  bool mEnableDetOutput;
151  int mNWorkers;
152  int mNToyToRebuild;
153  int mRebuildParamValues;
154  int mPrintLevel;
155  int mInitialFit;
156  int mRandomSeed;
157  double mNToysRatio;
158  double mMaxPoi;
159  int mAsimovBins;
160  std::string mMassValue;
161  std::string
162  mMinimizerType; // minimizer type (default is what is in ROOT::Math::MinimizerOptions::DefaultMinimizerType()
163  TString mResultFileName;
164 };
165 
166 } // end namespace RooStats
167 
168 RooStats::HypoTestInvTool::HypoTestInvTool()
169  : mPlotHypoTestResult(true), mWriteResult(false), mOptimize(true), mUseVectorStore(true), mGenerateBinned(false),
170  mUseProof(false), mEnableDetOutput(false), mRebuild(false), mReuseAltToys(false), mNWorkers(4),
171  mNToyToRebuild(100), mRebuildParamValues(0), mPrintLevel(0), mInitialFit(-1), mRandomSeed(-1), mNToysRatio(2),
172  mMaxPoi(-1), mAsimovBins(0), mMassValue(""), mMinimizerType(""), mResultFileName()
173 {
174 }
175 
176 void RooStats::HypoTestInvTool::SetParameter(const char *name, bool value)
177 {
178  //
179  // set boolean parameters
180  //
181 
182  std::string s_name(name);
183 
184  if (s_name.find("PlotHypoTestResult") != std::string::npos)
185  mPlotHypoTestResult = value;
186  if (s_name.find("WriteResult") != std::string::npos)
187  mWriteResult = value;
188  if (s_name.find("Optimize") != std::string::npos)
189  mOptimize = value;
190  if (s_name.find("UseVectorStore") != std::string::npos)
191  mUseVectorStore = value;
192  if (s_name.find("GenerateBinned") != std::string::npos)
193  mGenerateBinned = value;
194  if (s_name.find("UseProof") != std::string::npos)
195  mUseProof = value;
196  if (s_name.find("EnableDetailedOutput") != std::string::npos)
197  mEnableDetOutput = value;
198  if (s_name.find("Rebuild") != std::string::npos)
199  mRebuild = value;
200  if (s_name.find("ReuseAltToys") != std::string::npos)
201  mReuseAltToys = value;
202 
203  return;
204 }
205 
206 void RooStats::HypoTestInvTool::SetParameter(const char *name, int value)
207 {
208  //
209  // set integer parameters
210  //
211 
212  std::string s_name(name);
213 
214  if (s_name.find("NWorkers") != std::string::npos)
215  mNWorkers = value;
216  if (s_name.find("NToyToRebuild") != std::string::npos)
217  mNToyToRebuild = value;
218  if (s_name.find("RebuildParamValues") != std::string::npos)
219  mRebuildParamValues = value;
220  if (s_name.find("PrintLevel") != std::string::npos)
221  mPrintLevel = value;
222  if (s_name.find("InitialFit") != std::string::npos)
223  mInitialFit = value;
224  if (s_name.find("RandomSeed") != std::string::npos)
225  mRandomSeed = value;
226  if (s_name.find("AsimovBins") != std::string::npos)
227  mAsimovBins = value;
228 
229  return;
230 }
231 
232 void RooStats::HypoTestInvTool::SetParameter(const char *name, double value)
233 {
234  //
235  // set double precision parameters
236  //
237 
238  std::string s_name(name);
239 
240  if (s_name.find("NToysRatio") != std::string::npos)
241  mNToysRatio = value;
242  if (s_name.find("MaxPOI") != std::string::npos)
243  mMaxPoi = value;
244 
245  return;
246 }
247 
248 void RooStats::HypoTestInvTool::SetParameter(const char *name, const char *value)
249 {
250  //
251  // set string parameters
252  //
253 
254  std::string s_name(name);
255 
256  if (s_name.find("MassValue") != std::string::npos)
257  mMassValue.assign(value);
258  if (s_name.find("MinimizerType") != std::string::npos)
259  mMinimizerType.assign(value);
260  if (s_name.find("ResultFileName") != std::string::npos)
261  mResultFileName = value;
262 
263  return;
264 }
265 
266 void StandardHypoTestInvDemo(const char *infile = 0, const char *wsName = "combined",
267  const char *modelSBName = "ModelConfig", const char *modelBName = "",
268  const char *dataName = "obsData", int calculatorType = 0, int testStatType = 0,
269  bool useCLs = true, int npoints = 6, double poimin = 0, double poimax = 5,
270  int ntoys = 1000, bool useNumberCounting = false, const char *nuisPriorName = 0)
271 {
272  /*
273 
274  Other Parameter to pass in tutorial
275  apart from standard for filename, ws, modelconfig and data
276 
277  type = 0 Freq calculator
278  type = 1 Hybrid calculator
279  type = 2 Asymptotic calculator
280  type = 3 Asymptotic calculator using nominal Asimov data sets (not using fitted parameter values but nominal ones)
281 
282  testStatType = 0 LEP
283  = 1 Tevatron
284  = 2 Profile Likelihood
285  = 3 Profile Likelihood one sided (i.e. = 0 if mu < mu_hat)
286  = 4 Profiel Likelihood signed ( pll = -pll if mu < mu_hat)
287  = 5 Max Likelihood Estimate as test statistic
288  = 6 Number of observed event as test statistic
289 
290  useCLs scan for CLs (otherwise for CLs+b)
291 
292  npoints: number of points to scan , for autoscan set npoints = -1
293 
294  poimin,poimax: min/max value to scan in case of fixed scans
295  (if min > max, try to find automatically)
296 
297  ntoys: number of toys to use
298 
299  useNumberCounting: set to true when using number counting events
300 
301  nuisPriorName: name of prior for the nuisance. This is often expressed as constraint term in the global model
302  It is needed only when using the HybridCalculator (type=1)
303  If not given by default the prior pdf from ModelConfig is used.
304 
305  extra options are available as global parameters of the macro. They major ones are:
306 
307  plotHypoTestResult plot result of tests at each point (TS distributions) (default is true)
308  useProof use Proof (default is true)
309  writeResult write result of scan (default is true)
310  rebuild rebuild scan for expected limits (require extra toys) (default is false)
311  generateBinned generate binned data sets for toys (default is false) - be careful not to activate with
312  a too large (>=3) number of observables
313  nToyRatio ratio of S+B/B toys (default is 2)
314 
315 
316  */
317 
318  TString filename(infile);
319  if (filename.IsNull()) {
320  filename = "results/example_combined_GaussExample_model.root";
321  bool fileExist = !gSystem->AccessPathName(filename); // note opposite return code
322  // if file does not exists generate with histfactory
323  if (!fileExist) {
324 #ifdef _WIN32
325  cout << "HistFactory file cannot be generated on Windows - exit" << endl;
326  return;
327 #endif
328  // Normally this would be run on the command line
329  cout << "will run standard hist2workspace example" << endl;
330  gROOT->ProcessLine(".! prepareHistFactory .");
331  gROOT->ProcessLine(".! hist2workspace config/example.xml");
332  cout << "\n\n---------------------" << endl;
333  cout << "Done creating example input" << endl;
334  cout << "---------------------\n\n" << endl;
335  }
336 
337  } else
338  filename = infile;
339 
340  // Try to open the file
341  TFile *file = TFile::Open(filename);
342 
343  // if input file was specified byt not found, quit
344  if (!file) {
345  cout << "StandardRooStatsDemoMacro: Input file " << filename << " is not found" << endl;
346  return;
347  }
348 
349  HypoTestInvTool calc;
350 
351  // set parameters
352  calc.SetParameter("PlotHypoTestResult", optHTInv.plotHypoTestResult);
353  calc.SetParameter("WriteResult", optHTInv.writeResult);
354  calc.SetParameter("Optimize", optHTInv.optimize);
355  calc.SetParameter("UseVectorStore", optHTInv.useVectorStore);
356  calc.SetParameter("GenerateBinned", optHTInv.generateBinned);
357  calc.SetParameter("NToysRatio", optHTInv.nToysRatio);
358  calc.SetParameter("MaxPOI", optHTInv.maxPOI);
359  calc.SetParameter("UseProof", optHTInv.useProof);
360  calc.SetParameter("EnableDetailedOutput", optHTInv.enableDetailedOutput);
361  calc.SetParameter("NWorkers", optHTInv.nworkers);
362  calc.SetParameter("Rebuild", optHTInv.rebuild);
363  calc.SetParameter("ReuseAltToys", optHTInv.reuseAltToys);
364  calc.SetParameter("NToyToRebuild", optHTInv.nToyToRebuild);
365  calc.SetParameter("RebuildParamValues", optHTInv.rebuildParamValues);
366  calc.SetParameter("MassValue", optHTInv.massValue.c_str());
367  calc.SetParameter("MinimizerType", optHTInv.minimizerType.c_str());
368  calc.SetParameter("PrintLevel", optHTInv.printLevel);
369  calc.SetParameter("InitialFit", optHTInv.initialFit);
370  calc.SetParameter("ResultFileName", optHTInv.resultFileName);
371  calc.SetParameter("RandomSeed", optHTInv.randomSeed);
372  calc.SetParameter("AsimovBins", optHTInv.nAsimovBins);
373 
374  // enable offset for all roostats
375  if (optHTInv.useNLLOffset)
376  RooStats::UseNLLOffset(true);
377 
378  RooWorkspace *w = dynamic_cast<RooWorkspace *>(file->Get(wsName));
379  HypoTestInverterResult *r = 0;
380  std::cout << w << "\t" << filename << std::endl;
381  if (w != NULL) {
382  r = calc.RunInverter(w, modelSBName, modelBName, dataName, calculatorType, testStatType, useCLs, npoints, poimin,
383  poimax, ntoys, useNumberCounting, nuisPriorName);
384  if (!r) {
385  std::cerr << "Error running the HypoTestInverter - Exit " << std::endl;
386  return;
387  }
388  } else {
389  // case workspace is not present look for the inverter result
390  std::cout << "Reading an HypoTestInverterResult with name " << wsName << " from file " << filename << std::endl;
391  r = dynamic_cast<HypoTestInverterResult *>(file->Get(wsName)); //
392  if (!r) {
393  std::cerr << "File " << filename << " does not contain a workspace or an HypoTestInverterResult - Exit "
394  << std::endl;
395  file->ls();
396  return;
397  }
398  }
399 
400  calc.AnalyzeResult(r, calculatorType, testStatType, useCLs, npoints, infile);
401 
402  return;
403 }
404 
405 void RooStats::HypoTestInvTool::AnalyzeResult(HypoTestInverterResult *r, int calculatorType, int testStatType,
406  bool useCLs, int npoints, const char *fileNameBase)
407 {
408 
409  // analyze result produced by the inverter, optionally save it in a file
410 
411  double lowerLimit = 0;
412  double llError = 0;
413 #if defined ROOT_SVN_VERSION && ROOT_SVN_VERSION >= 44126
414  if (r->IsTwoSided()) {
415  lowerLimit = r->LowerLimit();
416  llError = r->LowerLimitEstimatedError();
417  }
418 #else
419  lowerLimit = r->LowerLimit();
420  llError = r->LowerLimitEstimatedError();
421 #endif
422 
423  double upperLimit = r->UpperLimit();
424  double ulError = r->UpperLimitEstimatedError();
425 
426  // std::cout << "DEBUG : [ " << lowerLimit << " , " << upperLimit << " ] " << std::endl;
427 
428  if (lowerLimit < upperLimit * (1. - 1.E-4) && lowerLimit != 0)
429  std::cout << "The computed lower limit is: " << lowerLimit << " +/- " << llError << std::endl;
430  std::cout << "The computed upper limit is: " << upperLimit << " +/- " << ulError << std::endl;
431 
432  // compute expected limit
433  std::cout << "Expected upper limits, using the B (alternate) model : " << std::endl;
434  std::cout << " expected limit (median) " << r->GetExpectedUpperLimit(0) << std::endl;
435  std::cout << " expected limit (-1 sig) " << r->GetExpectedUpperLimit(-1) << std::endl;
436  std::cout << " expected limit (+1 sig) " << r->GetExpectedUpperLimit(1) << std::endl;
437  std::cout << " expected limit (-2 sig) " << r->GetExpectedUpperLimit(-2) << std::endl;
438  std::cout << " expected limit (+2 sig) " << r->GetExpectedUpperLimit(2) << std::endl;
439 
440  // detailed output
441  if (mEnableDetOutput) {
442  mWriteResult = true;
443  Info("StandardHypoTestInvDemo", "detailed output will be written in output result file");
444  }
445 
446  // write result in a file
447  if (r != NULL && mWriteResult) {
448 
449  // write to a file the results
450  const char *calcType = (calculatorType == 0) ? "Freq" : (calculatorType == 1) ? "Hybr" : "Asym";
451  const char *limitType = (useCLs) ? "CLs" : "Cls+b";
452  const char *scanType = (npoints < 0) ? "auto" : "grid";
453  if (mResultFileName.IsNull()) {
454  mResultFileName = TString::Format("%s_%s_%s_ts%d_", calcType, limitType, scanType, testStatType);
455  // strip the / from the filename
456  if (mMassValue.size() > 0) {
457  mResultFileName += mMassValue.c_str();
458  mResultFileName += "_";
459  }
460 
461  TString name = fileNameBase;
462  name.Replace(0, name.Last('/') + 1, "");
463  mResultFileName += name;
464  }
465 
466  // get (if existing) rebuilt UL distribution
467  TString uldistFile = "RULDist.root";
468  TObject *ulDist = 0;
469  bool existULDist = !gSystem->AccessPathName(uldistFile);
470  if (existULDist) {
471  TFile *fileULDist = TFile::Open(uldistFile);
472  if (fileULDist)
473  ulDist = fileULDist->Get("RULDist");
474  }
475 
476  TFile *fileOut = new TFile(mResultFileName, "RECREATE");
477  r->Write();
478  if (ulDist)
479  ulDist->Write();
480  Info("StandardHypoTestInvDemo", "HypoTestInverterResult has been written in the file %s", mResultFileName.Data());
481 
482  fileOut->Close();
483  }
484 
485  // plot the result ( p values vs scan points)
486  std::string typeName = "";
487  if (calculatorType == 0)
488  typeName = "Frequentist";
489  if (calculatorType == 1)
490  typeName = "Hybrid";
491  else if (calculatorType == 2 || calculatorType == 3) {
492  typeName = "Asymptotic";
493  mPlotHypoTestResult = false;
494  }
495 
496  const char *resultName = r->GetName();
497  TString plotTitle = TString::Format("%s CL Scan for workspace %s", typeName.c_str(), resultName);
498  HypoTestInverterPlot *plot = new HypoTestInverterPlot("HTI_Result_Plot", plotTitle, r);
499 
500  // plot in a new canvas with style
501  TString c1Name = TString::Format("%s_Scan", typeName.c_str());
502  TCanvas *c1 = new TCanvas(c1Name);
503  c1->SetLogy(false);
504 
505  plot->Draw("CLb 2CL"); // plot all and Clb
506 
507  // if (useCLs)
508  // plot->Draw("CLb 2CL"); // plot all and Clb
509  // else
510  // plot->Draw(""); // plot all and Clb
511 
512  const int nEntries = r->ArraySize();
513 
514  // plot test statistics distributions for the two hypothesis
515  if (mPlotHypoTestResult) {
516  TCanvas *c2 = new TCanvas("c2");
517  if (nEntries > 1) {
518  int ny = TMath::CeilNint(TMath::Sqrt(nEntries));
519  int nx = TMath::CeilNint(double(nEntries) / ny);
520  c2->Divide(nx, ny);
521  }
522  for (int i = 0; i < nEntries; i++) {
523  if (nEntries > 1)
524  c2->cd(i + 1);
525  SamplingDistPlot *pl = plot->MakeTestStatPlot(i);
526  pl->SetLogYaxis(true);
527  pl->Draw();
528  }
529  }
530  gPad = c1;
531 }
532 
533 // internal routine to run the inverter
534 HypoTestInverterResult *RooStats::HypoTestInvTool::RunInverter(RooWorkspace *w, const char *modelSBName,
535  const char *modelBName, const char *dataName, int type,
536  int testStatType, bool useCLs, int npoints,
537  double poimin, double poimax, int ntoys,
538  bool useNumberCounting, const char *nuisPriorName)
539 {
540 
541  std::cout << "Running HypoTestInverter on the workspace " << w->GetName() << std::endl;
542 
543  w->Print();
544 
545  RooAbsData *data = w->data(dataName);
546  if (!data) {
547  Error("StandardHypoTestDemo", "Not existing data %s", dataName);
548  return 0;
549  } else
550  std::cout << "Using data set " << dataName << std::endl;
551 
552  if (mUseVectorStore) {
553  RooAbsData::setDefaultStorageType(RooAbsData::Vector);
554  data->convertToVectorStore();
555  }
556 
557  // get models from WS
558  // get the modelConfig out of the file
559  ModelConfig *bModel = (ModelConfig *)w->obj(modelBName);
560  ModelConfig *sbModel = (ModelConfig *)w->obj(modelSBName);
561 
562  if (!sbModel) {
563  Error("StandardHypoTestDemo", "Not existing ModelConfig %s", modelSBName);
564  return 0;
565  }
566  // check the model
567  if (!sbModel->GetPdf()) {
568  Error("StandardHypoTestDemo", "Model %s has no pdf ", modelSBName);
569  return 0;
570  }
571  if (!sbModel->GetParametersOfInterest()) {
572  Error("StandardHypoTestDemo", "Model %s has no poi ", modelSBName);
573  return 0;
574  }
575  if (!sbModel->GetObservables()) {
576  Error("StandardHypoTestInvDemo", "Model %s has no observables ", modelSBName);
577  return 0;
578  }
579  if (!sbModel->GetSnapshot()) {
580  Info("StandardHypoTestInvDemo", "Model %s has no snapshot - make one using model poi", modelSBName);
581  sbModel->SetSnapshot(*sbModel->GetParametersOfInterest());
582  }
583 
584  // case of no systematics
585  // remove nuisance parameters from model
586  if (optHTInv.noSystematics) {
587  const RooArgSet *nuisPar = sbModel->GetNuisanceParameters();
588  if (nuisPar && nuisPar->getSize() > 0) {
589  std::cout << "StandardHypoTestInvDemo"
590  << " - Switch off all systematics by setting them constant to their initial values" << std::endl;
591  RooStats::SetAllConstant(*nuisPar);
592  }
593  if (bModel) {
594  const RooArgSet *bnuisPar = bModel->GetNuisanceParameters();
595  if (bnuisPar)
596  RooStats::SetAllConstant(*bnuisPar);
597  }
598  }
599 
600  if (!bModel || bModel == sbModel) {
601  Info("StandardHypoTestInvDemo", "The background model %s does not exist", modelBName);
602  Info("StandardHypoTestInvDemo", "Copy it from ModelConfig %s and set POI to zero", modelSBName);
603  bModel = (ModelConfig *)sbModel->Clone();
604  bModel->SetName(TString(modelSBName) + TString("_with_poi_0"));
605  RooRealVar *var = dynamic_cast<RooRealVar *>(bModel->GetParametersOfInterest()->first());
606  if (!var)
607  return 0;
608  double oldval = var->getVal();
609  var->setVal(0);
610  bModel->SetSnapshot(RooArgSet(*var));
611  var->setVal(oldval);
612  } else {
613  if (!bModel->GetSnapshot()) {
614  Info("StandardHypoTestInvDemo", "Model %s has no snapshot - make one using model poi and 0 values ",
615  modelBName);
616  RooRealVar *var = dynamic_cast<RooRealVar *>(bModel->GetParametersOfInterest()->first());
617  if (var) {
618  double oldval = var->getVal();
619  var->setVal(0);
620  bModel->SetSnapshot(RooArgSet(*var));
621  var->setVal(oldval);
622  } else {
623  Error("StandardHypoTestInvDemo", "Model %s has no valid poi", modelBName);
624  return 0;
625  }
626  }
627  }
628 
629  // check model has global observables when there are nuisance pdf
630  // for the hybrid case the globals are not needed
631  if (type != 1) {
632  bool hasNuisParam = (sbModel->GetNuisanceParameters() && sbModel->GetNuisanceParameters()->getSize() > 0);
633  bool hasGlobalObs = (sbModel->GetGlobalObservables() && sbModel->GetGlobalObservables()->getSize() > 0);
634  if (hasNuisParam && !hasGlobalObs) {
635  // try to see if model has nuisance parameters first
636  RooAbsPdf *constrPdf = RooStats::MakeNuisancePdf(*sbModel, "nuisanceConstraintPdf_sbmodel");
637  if (constrPdf) {
638  Warning("StandardHypoTestInvDemo", "Model %s has nuisance parameters but no global observables associated",
639  sbModel->GetName());
640  Warning("StandardHypoTestInvDemo",
641  "\tThe effect of the nuisance parameters will not be treated correctly ");
642  }
643  }
644  }
645 
646  // save all initial parameters of the model including the global observables
647  RooArgSet initialParameters;
648  RooArgSet *allParams = sbModel->GetPdf()->getParameters(*data);
649  allParams->snapshot(initialParameters);
650  delete allParams;
651 
652  // run first a data fit
653 
654  const RooArgSet *poiSet = sbModel->GetParametersOfInterest();
655  RooRealVar *poi = (RooRealVar *)poiSet->first();
656 
657  std::cout << "StandardHypoTestInvDemo : POI initial value: " << poi->GetName() << " = " << poi->getVal()
658  << std::endl;
659 
660  // fit the data first (need to use constraint )
661  TStopwatch tw;
662 
663  bool doFit = mInitialFit;
664  if (testStatType == 0 && mInitialFit == -1)
665  doFit = false; // case of LEP test statistic
666  if (type == 3 && mInitialFit == -1)
667  doFit = false; // case of Asymptoticcalculator with nominal Asimov
668  double poihat = 0;
669 
670  if (mMinimizerType.size() == 0)
671  mMinimizerType = ROOT::Math::MinimizerOptions::DefaultMinimizerType();
672  else
673  ROOT::Math::MinimizerOptions::SetDefaultMinimizer(mMinimizerType.c_str());
674 
675  Info("StandardHypoTestInvDemo", "Using %s as minimizer for computing the test statistic",
676  ROOT::Math::MinimizerOptions::DefaultMinimizerType().c_str());
677 
678  if (doFit) {
679 
680  // do the fit : By doing a fit the POI snapshot (for S+B) is set to the fit value
681  // and the nuisance parameters nominal values will be set to the fit value.
682  // This is relevant when using LEP test statistics
683 
684  Info("StandardHypoTestInvDemo", " Doing a first fit to the observed data ");
685  RooArgSet constrainParams;
686  if (sbModel->GetNuisanceParameters())
687  constrainParams.add(*sbModel->GetNuisanceParameters());
688  RooStats::RemoveConstantParameters(&constrainParams);
689  tw.Start();
690  RooFitResult *fitres = sbModel->GetPdf()->fitTo(
691  *data, InitialHesse(false), Hesse(false), Minimizer(mMinimizerType.c_str(), "Migrad"), Strategy(0),
692  PrintLevel(mPrintLevel), Constrain(constrainParams), Save(true), Offset(RooStats::IsNLLOffset()));
693  if (fitres->status() != 0) {
694  Warning("StandardHypoTestInvDemo",
695  "Fit to the model failed - try with strategy 1 and perform first an Hesse computation");
696  fitres = sbModel->GetPdf()->fitTo(
697  *data, InitialHesse(true), Hesse(false), Minimizer(mMinimizerType.c_str(), "Migrad"), Strategy(1),
698  PrintLevel(mPrintLevel + 1), Constrain(constrainParams), Save(true), Offset(RooStats::IsNLLOffset()));
699  }
700  if (fitres->status() != 0)
701  Warning("StandardHypoTestInvDemo", " Fit still failed - continue anyway.....");
702 
703  poihat = poi->getVal();
704  std::cout << "StandardHypoTestInvDemo - Best Fit value : " << poi->GetName() << " = " << poihat << " +/- "
705  << poi->getError() << std::endl;
706  std::cout << "Time for fitting : ";
707  tw.Print();
708 
709  // save best fit value in the poi snapshot
710  sbModel->SetSnapshot(*sbModel->GetParametersOfInterest());
711  std::cout << "StandardHypoTestInvo: snapshot of S+B Model " << sbModel->GetName()
712  << " is set to the best fit value" << std::endl;
713  }
714 
715  // print a message in case of LEP test statistics because it affects result by doing or not doing a fit
716  if (testStatType == 0) {
717  if (!doFit)
718  Info("StandardHypoTestInvDemo", "Using LEP test statistic - an initial fit is not done and the TS will use "
719  "the nuisances at the model value");
720  else
721  Info("StandardHypoTestInvDemo", "Using LEP test statistic - an initial fit has been done and the TS will use "
722  "the nuisances at the best fit value");
723  }
724 
725  // build test statistics and hypotest calculators for running the inverter
726 
727  SimpleLikelihoodRatioTestStat slrts(*sbModel->GetPdf(), *bModel->GetPdf());
728 
729  // null parameters must includes snapshot of poi plus the nuisance values
730  RooArgSet nullParams(*sbModel->GetSnapshot());
731  if (sbModel->GetNuisanceParameters())
732  nullParams.add(*sbModel->GetNuisanceParameters());
733  if (sbModel->GetSnapshot())
734  slrts.SetNullParameters(nullParams);
735  RooArgSet altParams(*bModel->GetSnapshot());
736  if (bModel->GetNuisanceParameters())
737  altParams.add(*bModel->GetNuisanceParameters());
738  if (bModel->GetSnapshot())
739  slrts.SetAltParameters(altParams);
740  if (mEnableDetOutput)
741  slrts.EnableDetailedOutput();
742 
743  // ratio of profile likelihood - need to pass snapshot for the alt
744  RatioOfProfiledLikelihoodsTestStat ropl(*sbModel->GetPdf(), *bModel->GetPdf(), bModel->GetSnapshot());
745  ropl.SetSubtractMLE(false);
746  if (testStatType == 11)
747  ropl.SetSubtractMLE(true);
748  ropl.SetPrintLevel(mPrintLevel);
749  ropl.SetMinimizer(mMinimizerType.c_str());
750  if (mEnableDetOutput)
751  ropl.EnableDetailedOutput();
752 
753  ProfileLikelihoodTestStat profll(*sbModel->GetPdf());
754  if (testStatType == 3)
755  profll.SetOneSided(true);
756  if (testStatType == 4)
757  profll.SetSigned(true);
758  profll.SetMinimizer(mMinimizerType.c_str());
759  profll.SetPrintLevel(mPrintLevel);
760  if (mEnableDetOutput)
761  profll.EnableDetailedOutput();
762 
763  profll.SetReuseNLL(mOptimize);
764  slrts.SetReuseNLL(mOptimize);
765  ropl.SetReuseNLL(mOptimize);
766 
767  if (mOptimize) {
768  profll.SetStrategy(0);
769  ropl.SetStrategy(0);
770  ROOT::Math::MinimizerOptions::SetDefaultStrategy(0);
771  }
772 
773  if (mMaxPoi > 0)
774  poi->setMax(mMaxPoi); // increase limit
775 
776  MaxLikelihoodEstimateTestStat maxll(*sbModel->GetPdf(), *poi);
777  NumEventsTestStat nevtts;
778 
779  AsymptoticCalculator::SetPrintLevel(mPrintLevel);
780 
781  // create the HypoTest calculator class
782  HypoTestCalculatorGeneric *hc = 0;
783  if (type == 0)
784  hc = new FrequentistCalculator(*data, *bModel, *sbModel);
785  else if (type == 1)
786  hc = new HybridCalculator(*data, *bModel, *sbModel);
787  // else if (type == 2 ) hc = new AsymptoticCalculator(*data, *bModel, *sbModel, false, mAsimovBins);
788  // else if (type == 3 ) hc = new AsymptoticCalculator(*data, *bModel, *sbModel, true, mAsimovBins); // for using
789  // Asimov data generated with nominal values
790  else if (type == 2)
791  hc = new AsymptoticCalculator(*data, *bModel, *sbModel, false);
792  else if (type == 3)
793  hc = new AsymptoticCalculator(*data, *bModel, *sbModel,
794  true); // for using Asimov data generated with nominal values
795  else {
796  Error("StandardHypoTestInvDemo", "Invalid - calculator type = %d supported values are only :\n\t\t\t 0 "
797  "(Frequentist) , 1 (Hybrid) , 2 (Asymptotic) ",
798  type);
799  return 0;
800  }
801 
802  // set the test statistic
803  TestStatistic *testStat = 0;
804  if (testStatType == 0)
805  testStat = &slrts;
806  if (testStatType == 1 || testStatType == 11)
807  testStat = &ropl;
808  if (testStatType == 2 || testStatType == 3 || testStatType == 4)
809  testStat = &profll;
810  if (testStatType == 5)
811  testStat = &maxll;
812  if (testStatType == 6)
813  testStat = &nevtts;
814 
815  if (testStat == 0) {
816  Error("StandardHypoTestInvDemo", "Invalid - test statistic type = %d supported values are only :\n\t\t\t 0 (SLR) "
817  ", 1 (Tevatron) , 2 (PLR), 3 (PLR1), 4(MLE)",
818  testStatType);
819  return 0;
820  }
821 
822  ToyMCSampler *toymcs = (ToyMCSampler *)hc->GetTestStatSampler();
823  if (toymcs && (type == 0 || type == 1)) {
824  // look if pdf is number counting or extended
825  if (sbModel->GetPdf()->canBeExtended()) {
826  if (useNumberCounting)
827  Warning("StandardHypoTestInvDemo", "Pdf is extended: but number counting flag is set: ignore it ");
828  } else {
829  // for not extended pdf
830  if (!useNumberCounting) {
831  int nEvents = data->numEntries();
832  Info("StandardHypoTestInvDemo",
833  "Pdf is not extended: number of events to generate taken from observed data set is %d", nEvents);
834  toymcs->SetNEventsPerToy(nEvents);
835  } else {
836  Info("StandardHypoTestInvDemo", "using a number counting pdf");
837  toymcs->SetNEventsPerToy(1);
838  }
839  }
840 
841  toymcs->SetTestStatistic(testStat);
842 
843  if (data->isWeighted() && !mGenerateBinned) {
844  Info("StandardHypoTestInvDemo", "Data set is weighted, nentries = %d and sum of weights = %8.1f but toy "
845  "generation is unbinned - it would be faster to set mGenerateBinned to true\n",
846  data->numEntries(), data->sumEntries());
847  }
848  toymcs->SetGenerateBinned(mGenerateBinned);
849 
850  toymcs->SetUseMultiGen(mOptimize);
851 
852  if (mGenerateBinned && sbModel->GetObservables()->getSize() > 2) {
853  Warning("StandardHypoTestInvDemo", "generate binned is activated but the number of observable is %d. Too much "
854  "memory could be needed for allocating all the bins",
855  sbModel->GetObservables()->getSize());
856  }
857 
858  // set the random seed if needed
859  if (mRandomSeed >= 0)
860  RooRandom::randomGenerator()->SetSeed(mRandomSeed);
861  }
862 
863  // specify if need to re-use same toys
864  if (mReuseAltToys) {
865  hc->UseSameAltToys();
866  }
867 
868  if (type == 1) {
869  HybridCalculator *hhc = dynamic_cast<HybridCalculator *>(hc);
870  assert(hhc);
871 
872  hhc->SetToys(ntoys, ntoys / mNToysRatio); // can use less ntoys for b hypothesis
873 
874  // remove global observables from ModelConfig (this is probably not needed anymore in 5.32)
875  bModel->SetGlobalObservables(RooArgSet());
876  sbModel->SetGlobalObservables(RooArgSet());
877 
878  // check for nuisance prior pdf in case of nuisance parameters
879  if (bModel->GetNuisanceParameters() || sbModel->GetNuisanceParameters()) {
880 
881  // fix for using multigen (does not work in this case)
882  toymcs->SetUseMultiGen(false);
883  ToyMCSampler::SetAlwaysUseMultiGen(false);
884 
885  RooAbsPdf *nuisPdf = 0;
886  if (nuisPriorName)
887  nuisPdf = w->pdf(nuisPriorName);
888  // use prior defined first in bModel (then in SbModel)
889  if (!nuisPdf) {
890  Info("StandardHypoTestInvDemo",
891  "No nuisance pdf given for the HybridCalculator - try to deduce pdf from the model");
892  if (bModel->GetPdf() && bModel->GetObservables())
893  nuisPdf = RooStats::MakeNuisancePdf(*bModel, "nuisancePdf_bmodel");
894  else
895  nuisPdf = RooStats::MakeNuisancePdf(*sbModel, "nuisancePdf_sbmodel");
896  }
897  if (!nuisPdf) {
898  if (bModel->GetPriorPdf()) {
899  nuisPdf = bModel->GetPriorPdf();
900  Info("StandardHypoTestInvDemo",
901  "No nuisance pdf given - try to use %s that is defined as a prior pdf in the B model",
902  nuisPdf->GetName());
903  } else {
904  Error("StandardHypoTestInvDemo", "Cannot run Hybrid calculator because no prior on the nuisance "
905  "parameter is specified or can be derived");
906  return 0;
907  }
908  }
909  assert(nuisPdf);
910  Info("StandardHypoTestInvDemo", "Using as nuisance Pdf ... ");
911  nuisPdf->Print();
912 
913  const RooArgSet *nuisParams =
914  (bModel->GetNuisanceParameters()) ? bModel->GetNuisanceParameters() : sbModel->GetNuisanceParameters();
915  RooArgSet *np = nuisPdf->getObservables(*nuisParams);
916  if (np->getSize() == 0) {
917  Warning("StandardHypoTestInvDemo",
918  "Prior nuisance does not depend on nuisance parameters. They will be smeared in their full range");
919  }
920  delete np;
921 
922  hhc->ForcePriorNuisanceAlt(*nuisPdf);
923  hhc->ForcePriorNuisanceNull(*nuisPdf);
924  }
925  } else if (type == 2 || type == 3) {
926  if (testStatType == 3)
927  ((AsymptoticCalculator *)hc)->SetOneSided(true);
928  if (testStatType != 2 && testStatType != 3)
929  Warning("StandardHypoTestInvDemo",
930  "Only the PL test statistic can be used with AsymptoticCalculator - use by default a two-sided PL");
931  } else if (type == 0) {
932  ((FrequentistCalculator *)hc)->SetToys(ntoys, ntoys / mNToysRatio);
933  // store also the fit information for each poi point used by calculator based on toys
934  if (mEnableDetOutput)
935  ((FrequentistCalculator *)hc)->StoreFitInfo(true);
936  } else if (type == 1) {
937  ((HybridCalculator *)hc)->SetToys(ntoys, ntoys / mNToysRatio);
938  // store also the fit information for each poi point used by calculator based on toys
939  // if (mEnableDetOutput) ((HybridCalculator*) hc)->StoreFitInfo(true);
940  }
941 
942  // Get the result
943  RooMsgService::instance().getStream(1).removeTopic(RooFit::NumIntegration);
944 
945  HypoTestInverter calc(*hc);
946  calc.SetConfidenceLevel(optHTInv.confLevel);
947 
948  calc.UseCLs(useCLs);
949  calc.SetVerbose(true);
950 
951  // can speed up using proof-lite
952  if (mUseProof) {
953  ProofConfig pc(*w, mNWorkers, "", kFALSE);
954  toymcs->SetProofConfig(&pc); // enable proof
955  }
956 
957  if (npoints > 0) {
958  if (poimin > poimax) {
959  // if no min/max given scan between MLE and +4 sigma
960  poimin = int(poihat);
961  poimax = int(poihat + 4 * poi->getError());
962  }
963  std::cout << "Doing a fixed scan in interval : " << poimin << " , " << poimax << std::endl;
964  calc.SetFixedScan(npoints, poimin, poimax);
965  } else {
966  // poi->setMax(10*int( (poihat+ 10 *poi->getError() )/10 ) );
967  std::cout << "Doing an automatic scan in interval : " << poi->getMin() << " , " << poi->getMax() << std::endl;
968  }
969 
970  tw.Start();
971  HypoTestInverterResult *r = calc.GetInterval();
972  std::cout << "Time to perform limit scan \n";
973  tw.Print();
974 
975  if (mRebuild) {
976 
977  std::cout << "\n***************************************************************\n";
978  std::cout << "Rebuild the upper limit distribution by re-generating new set of pseudo-experiment and re-compute "
979  "for each of them a new upper limit\n\n";
980 
981  allParams = sbModel->GetPdf()->getParameters(*data);
982 
983  // define on which value of nuisance parameters to do the rebuild
984  // default is best fit value for bmodel snapshot
985 
986  if (mRebuildParamValues != 0) {
987  // set all parameters to their initial workspace values
988  *allParams = initialParameters;
989  }
990  if (mRebuildParamValues == 0 || mRebuildParamValues == 1) {
991  RooArgSet constrainParams;
992  if (sbModel->GetNuisanceParameters())
993  constrainParams.add(*sbModel->GetNuisanceParameters());
994  RooStats::RemoveConstantParameters(&constrainParams);
995 
996  const RooArgSet *poiModel = sbModel->GetParametersOfInterest();
997  bModel->LoadSnapshot();
998 
999  // do a profile using the B model snapshot
1000  if (mRebuildParamValues == 0) {
1001 
1002  RooStats::SetAllConstant(*poiModel, true);
1003 
1004  sbModel->GetPdf()->fitTo(*data, InitialHesse(false), Hesse(false),
1005  Minimizer(mMinimizerType.c_str(), "Migrad"), Strategy(0), PrintLevel(mPrintLevel),
1006  Constrain(constrainParams), Offset(RooStats::IsNLLOffset()));
1007 
1008  std::cout << "rebuild using fitted parameter value for B-model snapshot" << std::endl;
1009  constrainParams.Print("v");
1010 
1011  RooStats::SetAllConstant(*poiModel, false);
1012  }
1013  }
1014  std::cout << "StandardHypoTestInvDemo: Initial parameters used for rebuilding: ";
1015  RooStats::PrintListContent(*allParams, std::cout);
1016  delete allParams;
1017 
1018  calc.SetCloseProof(1);
1019  tw.Start();
1020  SamplingDistribution *limDist = calc.GetUpperLimitDistribution(true, mNToyToRebuild);
1021  std::cout << "Time to rebuild distributions " << std::endl;
1022  tw.Print();
1023 
1024  if (limDist) {
1025  std::cout << "Expected limits after rebuild distribution " << std::endl;
1026  std::cout << "expected upper limit (median of limit distribution) " << limDist->InverseCDF(0.5) << std::endl;
1027  std::cout << "expected -1 sig limit (0.16% quantile of limit dist) "
1028  << limDist->InverseCDF(ROOT::Math::normal_cdf(-1)) << std::endl;
1029  std::cout << "expected +1 sig limit (0.84% quantile of limit dist) "
1030  << limDist->InverseCDF(ROOT::Math::normal_cdf(1)) << std::endl;
1031  std::cout << "expected -2 sig limit (.025% quantile of limit dist) "
1032  << limDist->InverseCDF(ROOT::Math::normal_cdf(-2)) << std::endl;
1033  std::cout << "expected +2 sig limit (.975% quantile of limit dist) "
1034  << limDist->InverseCDF(ROOT::Math::normal_cdf(2)) << std::endl;
1035 
1036  // Plot the upper limit distribution
1037  SamplingDistPlot limPlot((mNToyToRebuild < 200) ? 50 : 100);
1038  limPlot.AddSamplingDistribution(limDist);
1039  limPlot.GetTH1F()->SetStats(true); // display statistics
1040  limPlot.SetLineColor(kBlue);
1041  new TCanvas("limPlot", "Upper Limit Distribution");
1042  limPlot.Draw();
1043 
1044  /// save result in a file
1045  limDist->SetName("RULDist");
1046  TFile *fileOut = new TFile("RULDist.root", "RECREATE");
1047  limDist->Write();
1048  fileOut->Close();
1049 
1050  // update r to a new updated result object containing the rebuilt expected p-values distributions
1051  // (it will not recompute the expected limit)
1052  if (r)
1053  delete r; // need to delete previous object since GetInterval will return a cloned copy
1054  r = calc.GetInterval();
1055 
1056  } else
1057  std::cout << "ERROR : failed to re-build distributions " << std::endl;
1058  }
1059 
1060  return r;
1061 }
1062 
1063 void ReadResult(const char *fileName, const char *resultName = "", bool useCLs = true)
1064 {
1065  // read a previous stored result from a file given the result name
1066 
1067  StandardHypoTestInvDemo(fileName, resultName, "", "", "", 0, 0, useCLs);
1068 }
1069 
1070 #ifdef USE_AS_MAIN
1071 int main()
1072 {
1073  StandardHypoTestInvDemo();
1074 }
1075 #endif