Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MakeModelAndMeasurementsFast.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id: cranmer $
2 // Author: Kyle Cranmer, Akira Shibata
3 /*************************************************************************
4  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 
12 
13 
14 // from std
15 #include <string>
16 #include <vector>
17 #include <map>
18 #include <iostream>
19 #include <sstream>
20 
21 // from root
22 #include "TFile.h"
23 #include "TH1F.h"
24 #include "TDOMParser.h"
25 #include "TXMLAttr.h"
26 #include "TString.h"
27 #include "TCanvas.h"
28 #include "TStyle.h"
29 #include "TLine.h"
30 #include "TSystem.h"
31 
32 
33 // from roofit
34 #include "RooStats/ModelConfig.h"
35 
36 // from this package
37 #include "Helper.h"
42 
44 
45 using namespace RooFit;
46 //using namespace RooStats;
47 //using namespace HistFactory;
48 
49 //using namespace std;
50 
51 
52 /** ********************************************************************************************
53  \ingroup HistFactory
54 
55  <p>
56  This is a package that creates a RooFit probability density function from ROOT histograms
57  of expected distributions and histograms that represent the +/- 1 sigma variations
58  from systematic effects. The resulting probability density function can then be used
59  with any of the statistical tools provided within RooStats, such as the profile
60  likelihood ratio, Feldman-Cousins, etc. In this version, the model is directly
61  fed to a likelihood ratio test, but it needs to be further factorized.</p>
62 
63  <p>
64  The user needs to provide histograms (in picobarns per bin) and configure the job
65  with XML. The configuration XML is defined in the file `$ROOTSYS/config/HistFactorySchema.dtd, but essentially
66  it is organized as follows (see the examples in `${ROOTSYS}/tutorials/histfactory/`)</p>
67 
68  <ul>
69  <li> a top level 'Combination' that is composed of:</li>
70  <ul>
71  <li> several 'Channels' (eg. ee, emu, mumu), which are composed of:</li>
72  <ul>
73  <li> several 'Samples' (eg. signal, bkg1, bkg2, ...), each of which has:</li>
74  <ul>
75  <li> a name</li>
76  <li> if the sample is normalized by theory (eg N = L*sigma) or not (eg. data driven)</li>
77  <li> a nominal expectation histogram</li>
78  <li> a named 'Normalization Factor' (which can be fixed or allowed to float in a fit)</li>
79  <li> several 'Overall Systematics' in normalization with:</li>
80  <ul>
81  <li> a name</li>
82  <li> +/- 1 sigma variations (eg. 1.05 and 0.95 for a 5% uncertainty)</li>
83  </ul>
84  <li> several 'Histogram Systematics' in shape with:</li>
85  <ul>
86  <li> a name (which can be shared with the OverallSyst if correlated)</li>
87  <li> +/- 1 sigma variational histograms</li>
88  </ul>
89  </ul>
90  </ul>
91  <li> several 'Measurements' (corresponding to a full fit of the model) each of which specifies</li>
92  <ul>
93  <li> a name for this fit to be used in tables and files</li>
94  <ul>
95  <li> what is the luminosity associated to the measurement in picobarns</li>
96  <li> which bins of the histogram should be used</li>
97  <li> what is the relative uncertainty on the luminosity </li>
98  <li> what is (are) the parameter(s) of interest that will be measured</li>
99  <li> which parameters should be fixed/floating (eg. nuisance parameters)</li>
100  </ul>
101  </ul>
102  </ul>
103 */
104 RooWorkspace* RooStats::HistFactory::MakeModelAndMeasurementFast( RooStats::HistFactory::Measurement& measurement ) {
105 
106  // This will be returned
107  RooWorkspace* ws = NULL;
108  TFile* outFile = NULL;
109  FILE* tableFile=NULL;
110 
111  try {
112 
113  std::cout << "Making Model and Measurements (Fast) for measurement: " << measurement.GetName() << std::endl;
114 
115  double lumiError = measurement.GetLumi()*measurement.GetLumiRelErr();
116 
117  std::cout << "using lumi = " << measurement.GetLumi() << " and lumiError = " << lumiError
118  << " including bins between " << measurement.GetBinLow() << " and " << measurement.GetBinHigh() << std::endl;
119  std::cout << "fixing the following parameters:" << std::endl;
120 
121  for(std::vector<std::string>::iterator itr=measurement.GetConstantParams().begin(); itr!=measurement.GetConstantParams().end(); ++itr){
122  std::cout << " " << *itr << std::endl;
123  }
124 
125  std::string rowTitle = measurement.GetName();
126 
127  std::vector<RooWorkspace*> channel_workspaces;
128  std::vector<std::string> channel_names;
129 
130  // Create the outFile - first check if the outputfile exists
131  std::string prefix = measurement.GetOutputFilePrefix();
132  // parse prefix to find output directory -
133  // assume there is a file prefix after the last "/" that we remove
134  // to get the directory name.
135  // We do by finding last occurrence of "/" and using as directory name what is before
136  // if we do not have a "/" in the prefix there is no output directory to be checked or created
137  size_t pos = prefix.rfind("/");
138  if (pos != std::string::npos) {
139  std::string outputDir = prefix.substr(0,pos);
140  std::cout << "Checking if output directory : " << outputDir << " - exists" << std::endl;
141  if (gSystem->OpenDirectory( outputDir.c_str() ) == 0 ) {
142  std::cout << "Output directory : " << outputDir << " - does not exist, try to create" << std::endl;
143  int success = gSystem->MakeDirectory( outputDir.c_str() );
144  if( success != 0 ) {
145  std::string fullOutputDir = std::string(gSystem->pwd()) + std::string("/") + outputDir;
146  std::cout << "Error: Failed to make output directory: " << fullOutputDir << std::endl;
147  throw hf_exc();
148  }
149  }
150  }
151 
152  // This holds the TGraphs that are created during the fit
153  std::string outputFileName = measurement.GetOutputFilePrefix() + "_" + measurement.GetName() + ".root";
154  std::cout << "Creating the output file: " << outputFileName << std::endl;
155  outFile = new TFile(outputFileName.c_str(), "recreate");
156 
157  // Create the table file
158  // This holds the table of fitted values and errors
159  std::string tableFileName = measurement.GetOutputFilePrefix() + "_results.table";
160  std::cout << "Creating the table file: " << tableFileName << std::endl;
161  tableFile = fopen( tableFileName.c_str(), "a");
162 
163  std::cout << "Creating the HistoToWorkspaceFactoryFast factory" << std::endl;
164  HistoToWorkspaceFactoryFast factory( measurement );
165 
166  // Make the factory, and do some preprocessing
167  // HistoToWorkspaceFactoryFast factory(measurement, rowTitle, outFile);
168  std::cout << "Setting preprocess functions" << std::endl;
169  factory.SetFunctionsToPreprocess( measurement.GetPreprocessFunctions() );
170 
171  // for results tables
172  fprintf(tableFile, " %s &", rowTitle.c_str() );
173 
174  // First: Loop to make the individual channels
175  for( unsigned int chanItr = 0; chanItr < measurement.GetChannels().size(); ++chanItr ) {
176 
177  HistFactory::Channel& channel = measurement.GetChannels().at( chanItr );
178  if( ! channel.CheckHistograms() ) {
179  std::cout << "MakeModelAndMeasurementsFast: Channel: " << channel.GetName()
180  << " has uninitialized histogram pointers" << std::endl;
181  throw hf_exc();
182  }
183 
184  // Make the workspace for this individual channel
185  std::string ch_name = channel.GetName();
186  std::cout << "Starting to process channel: " << ch_name << std::endl;
187  channel_names.push_back(ch_name);
188  RooWorkspace* ws_single = factory.MakeSingleChannelModel( measurement, channel );
189  channel_workspaces.push_back(ws_single);
190 
191  // Make the output
192  std::string ChannelFileName = measurement.GetOutputFilePrefix() + "_"
193  + ch_name + "_" + rowTitle + "_model.root";
194  ws_single->writeToFile( ChannelFileName.c_str() );
195 
196  // Now, write the measurement to the file
197  // Make a new measurement for only this channel
198  RooStats::HistFactory::Measurement meas_chan( measurement );
199  meas_chan.GetChannels().clear();
200  meas_chan.GetChannels().push_back( channel );
201  std::cout << "Opening File to hold channel: " << ChannelFileName << std::endl;
202  TFile* chanFile = TFile::Open( ChannelFileName.c_str(), "UPDATE" );
203  std::cout << "About to write channel measurement to file" << std::endl;
204  meas_chan.writeToFile( chanFile );
205  std::cout << "Successfully wrote channel to file" << std::endl;
206  chanFile->Close();
207 
208  // Get the Paramater of Interest as a RooRealVar
209  RooRealVar* poi = dynamic_cast<RooRealVar*>( ws_single->var( (measurement.GetPOI()).c_str() ) );
210 
211  // do fit unless exportOnly requested
212  if(! measurement.GetExportOnly()){
213  if(!poi) {
214  std::cout << "Can't do fit for: " << measurement.GetName()
215  << ", no parameter of interest" << std::endl;
216  } else {
217  if(ws_single->data("obsData")) {
218  FitModelAndPlot(measurement.GetName(), measurement.GetOutputFilePrefix(), ws_single,
219  ch_name, "obsData", outFile, tableFile);
220  } else {
221  FitModelAndPlot(measurement.GetName(), measurement.GetOutputFilePrefix(), ws_single,
222  ch_name, "asimovData", outFile, tableFile);
223  }
224  }
225  }
226 
227  fprintf(tableFile, " & " );
228  } // End loop over channels
229 
230  /***
231  Second: Make the combined model:
232  If you want output histograms in root format, create and pass it to the combine routine.
233  "combine" : will do the individual cross-section measurements plus combination
234  ***/
235 
236  // Use HistFactory to combine the individual channel workspaces
237  ws = factory.MakeCombinedModel(channel_names, channel_workspaces);
238 
239  // Configure that workspace
240  HistoToWorkspaceFactoryFast::ConfigureWorkspaceForMeasurement( "simPdf", ws, measurement );
241 
242  // Get the Parameter of interest as a RooRealVar
243  RooRealVar* poi = dynamic_cast<RooRealVar*>( ws->var( (measurement.GetPOI()).c_str() ) );
244 
245  std::string CombinedFileName = measurement.GetOutputFilePrefix() + "_combined_"
246  + rowTitle + "_model.root";
247  std::cout << "Writing combined workspace to file: " << CombinedFileName << std::endl;
248  ws->writeToFile( CombinedFileName.c_str() );
249  std::cout << "Writing combined measurement to file: " << CombinedFileName << std::endl;
250  TFile* combFile = TFile::Open( CombinedFileName.c_str(), "UPDATE" );
251  if( combFile == NULL ) {
252  std::cout << "Error: Failed to open file " << CombinedFileName << std::endl;
253  throw hf_exc();
254  }
255  measurement.writeToFile( combFile );
256  combFile->Close();
257 
258  // Fit the combined model
259  if(! measurement.GetExportOnly()){
260  if(!poi) {
261  std::cout << "Can't do fit for: " << measurement.GetName()
262  << ", no parameter of interest" << std::endl;
263  }
264  else {
265  if(ws->data("obsData")){
266  FitModelAndPlot(measurement.GetName(), measurement.GetOutputFilePrefix(), ws,"combined",
267  "obsData", outFile, tableFile);
268  }
269  else {
270  FitModelAndPlot(measurement.GetName(), measurement.GetOutputFilePrefix(), ws,"combined",
271  "asimovData", outFile, tableFile);
272  }
273  }
274  }
275 
276  fprintf(tableFile, " \\\\ \n");
277 
278  outFile->Close();
279  delete outFile;
280 
281  fclose( tableFile );
282 
283  }
284  catch(...) {
285  if( tableFile ) fclose(tableFile);
286  if(outFile) outFile->Close();
287  throw;
288  }
289 
290  return ws;
291 
292 }
293 
294 
295 ///////////////////////////////////////////////
296 void RooStats::HistFactory::FitModelAndPlot(const std::string& MeasurementName,
297  const std::string& FileNamePrefix,
298  RooWorkspace * combined, std::string channel,
299  std::string data_name,
300  TFile* outFile, FILE* tableFile ) {
301 
302  if( outFile == NULL ) {
303  std::cout << "Error: Output File in FitModelAndPlot is NULL" << std::endl;
304  throw hf_exc();
305  }
306 
307  if( tableFile == NULL ) {
308  std::cout << "Error: tableFile in FitModelAndPlot is NULL" << std::endl;
309  throw hf_exc();
310  }
311 
312  if( combined == NULL ) {
313  std::cout << "Error: Supplied workspace in FitModelAndPlot is NULL" << std::endl;
314  throw hf_exc();
315  }
316 
317  ModelConfig* combined_config = (ModelConfig *) combined->obj("ModelConfig");
318  if(!combined_config){
319  std::cout << "Error: no ModelConfig found in Measurement: "
320  << MeasurementName << std::endl;
321  throw hf_exc();
322  }
323 
324  RooAbsData* simData = combined->data(data_name.c_str());
325  if(!simData){
326  std::cout << "Error: Failed to get dataset: " << data_name
327  << " in measurement: " << MeasurementName << std::endl;
328  throw hf_exc();
329  }
330 
331  const RooArgSet* POIs = combined_config->GetParametersOfInterest();
332  if(!POIs) {
333  std::cout << "Not Fitting Model for measurement: " << MeasurementName
334  << ", no poi found" << std::endl;
335  // Should I throw an exception here?
336  return;
337  }
338 
339  RooAbsPdf* model = combined_config->GetPdf();
340  if( model==NULL ) {
341  std::cout << "Error: Failed to find pdf in ModelConfig: " << combined_config->GetName()
342  << std::endl;
343  throw hf_exc();
344  }
345 
346  // Save a Snapshot
347  RooArgSet PoiPlusNuisance;
348  if( combined_config->GetNuisanceParameters() ) {
349  PoiPlusNuisance.add( *combined_config->GetNuisanceParameters() );
350  }
351  PoiPlusNuisance.add( *combined_config->GetParametersOfInterest() );
352  combined->saveSnapshot("InitialValues", PoiPlusNuisance);
353 
354  ///////////////////////////////////////
355  // Do the fit
356  std::cout << "\n\n---------------" << std::endl;
357  std::cout << "---------------- Doing "<< channel << " Fit" << std::endl;
358  std::cout << "---------------\n\n" << std::endl;
359  model->fitTo(*simData, Minos(kTRUE), PrintLevel(1));
360 
361  // If there are no parameters of interest,
362  // we exit the function here
363  if( POIs->getSize()==0 ) {
364  std::cout << "WARNING: No POIs found in measurement: " << MeasurementName << std::endl;
365  return;
366  }
367 
368  // Loop over all POIs and print their fitted values
369  RooRealVar* poi = NULL; // (RooRealVar*) POIs->first();
370  TIterator* params_itr = POIs->createIterator();
371  TObject* poi_obj=NULL;
372  while( (poi_obj=params_itr->Next()) ) {
373  //poi = (RooRealVar*) poi_obj;
374  poi = dynamic_cast<RooRealVar*>(poi_obj);
375  std::cout << "printing results for " << poi->GetName()
376  << " at " << poi->getVal()<< " high "
377  << poi->getErrorLo() << " low "
378  << poi->getErrorHi() << std::endl;
379  }
380 
381  // But we only make detailed plots and tables
382  // for the 'first' POI
383  poi = dynamic_cast<RooRealVar*>(POIs->first());
384 
385  // Print the MINOS errors to the TableFile
386  fprintf(tableFile, " %.4f / %.4f ", poi->getErrorLo(), poi->getErrorHi());
387 
388  // Make the Profile Likelihood Plot
389  RooAbsReal* nll = model->createNLL(*simData);
390  RooAbsReal* profile = nll->createProfile(*poi);
391  if( profile==NULL ) {
392  std::cout << "Error: Failed to make ProfileLikelihood for: " << poi->GetName()
393  << " using model: " << model->GetName()
394  << " and data: " << simData->GetName()
395  << std::endl;
396  throw hf_exc();
397  }
398 
399  RooPlot* frame = poi->frame();
400  if( frame == NULL ) {
401  std::cout << "Error: Failed to create RooPlot frame for: " << poi->GetName() << std::endl;
402  throw hf_exc();
403  }
404 
405  // Draw the likelihood curve
406  FormatFrameForLikelihood(frame);
407  TCanvas* ProfileLikelihoodCanvas = new TCanvas( channel.c_str(), "",800,600);
408  nll->plotOn(frame, ShiftToZero(), LineColor(kRed), LineStyle(kDashed));
409  profile->plotOn(frame);
410  frame->SetMinimum(0);
411  frame->SetMaximum(2.);
412  frame->Draw();
413  std::string ProfilePlotName = FileNamePrefix+"_"+channel+"_"+MeasurementName+"_profileLR.eps";
414  ProfileLikelihoodCanvas->SaveAs( ProfilePlotName.c_str() );
415  delete ProfileLikelihoodCanvas;
416 
417  // Now, we save our results to the 'output' file
418  // (I'm not sure if users actually look into this file,
419  // but adding additional information and useful plots
420  // may make it more attractive)
421 
422  // Save to the output file
423  TDirectory* channel_dir = outFile->mkdir(channel.c_str());
424  if( channel_dir == NULL ) {
425  std::cout << "Error: Failed to make channel directory: " << channel << std::endl;
426  throw hf_exc();
427  }
428  TDirectory* summary_dir = channel_dir->mkdir("Summary");
429  if( summary_dir == NULL ) {
430  std::cout << "Error: Failed to make Summary directory for channel: "
431  << channel << std::endl;
432  throw hf_exc();
433  }
434  summary_dir->cd();
435 
436  // Save a graph of the profile likelihood curve
437  RooCurve* curve=frame->getCurve();
438  Int_t curve_N=curve->GetN();
439  Double_t* curve_x=curve->GetX();
440 
441  Double_t * x_arr = new Double_t[curve_N];
442  Double_t * y_arr_nll = new Double_t[curve_N];
443 
444  for(int i=0; i<curve_N; i++){
445  double f=curve_x[i];
446  poi->setVal(f);
447  x_arr[i]=f;
448  y_arr_nll[i]=nll->getVal();
449  }
450 
451  delete frame;
452 
453  TGraph* g = new TGraph(curve_N, x_arr, y_arr_nll);
454  g->SetName( (FileNamePrefix +"_nll").c_str() );
455  g->Write();
456  delete g;
457  delete [] x_arr;
458  delete [] y_arr_nll;
459 
460  // Finally, restore the initial values
461  combined->loadSnapshot("InitialValues");
462 
463 }
464 
465 
466 void RooStats::HistFactory::FitModel(RooWorkspace * combined, std::string data_name ) {
467 
468  std::cout << "In Fit Model" << std::endl;
469  ModelConfig * combined_config = (ModelConfig *) combined->obj("ModelConfig");
470  if(!combined_config){
471  std::cout << "no model config " << "ModelConfig" << " exiting" << std::endl;
472  return;
473  }
474 
475  RooAbsData* simData = combined->data(data_name.c_str());
476  if(!simData){
477  std::cout << "no data " << data_name << " exiting" << std::endl;
478  return;
479  }
480 
481  const RooArgSet * POIs=combined_config->GetParametersOfInterest();
482  if(!POIs){
483  std::cout << "no poi " << data_name << " exiting" << std::endl;
484  return;
485  }
486 
487  RooAbsPdf* model=combined_config->GetPdf();
488  model->fitTo(*simData, Minos(kTRUE), PrintLevel(1));
489 
490  }
491 
492 
493 void RooStats::HistFactory::FormatFrameForLikelihood(RooPlot* frame, std::string /*XTitle*/,
494  std::string YTitle){
495 
496  gStyle->SetCanvasBorderMode(0);
497  gStyle->SetPadBorderMode(0);
498  // gStyle->SetPadColor(0);
499  // gStyle->SetCanvasColor(255);
500  // gStyle->SetTitleFillColor(255);
501  // gStyle->SetFrameFillColor(0);
502  // gStyle->SetStatColor(255);
503 
504  RooAbsRealLValue* var = frame->getPlotVar();
505  double xmin = var->getMin();
506  double xmax = var->getMax();
507 
508  frame->SetTitle("");
509  // frame->GetXaxis()->SetTitle(XTitle.c_str());
510  frame->GetXaxis()->SetTitle(var->GetTitle());
511  frame->GetYaxis()->SetTitle(YTitle.c_str());
512  frame->SetMaximum(2.);
513  frame->SetMinimum(0.);
514  TLine * line = new TLine(xmin,.5,xmax,.5);
515  line->SetLineColor(kGreen);
516  TLine * line90 = new TLine(xmin,2.71/2.,xmax,2.71/2.);
517  line90->SetLineColor(kGreen);
518  TLine * line95 = new TLine(xmin,3.84/2.,xmax,3.84/2.);
519  line95->SetLineColor(kGreen);
520  frame->addObject(line);
521  frame->addObject(line90);
522  frame->addObject(line95);
523 }
524 
525