Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MakeModelAndMeasurements.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id: cranmer $
2 // Author: Kyle Cranmer, Akira Shibata
3 /*************************************************************************
4  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 ////////////////////////////////////////////////////////////////////////////////
12 
13 /**
14 \ingroup HistFactory
15 
16 BEGIN_HTML
17 <p>
18 This is a package that creates a RooFit probability density function from ROOT histograms
19 of expected distributions and histograms that represent the +/- 1 sigma variations
20 from systematic effects. The resulting probability density function can then be used
21 with any of the statistical tools provided within RooStats, such as the profile
22 likelihood ratio, Feldman-Cousins, etc. In this version, the model is directly
23 fed to a likelihodo ratio test, but it needs to be further factorized.</p>
24 
25 <p>
26 The user needs to provide histograms (in picobarns per bin) and configure the job
27 with XML. The configuration XML is defined in the file config/Config.dtd, but essentially
28 it is organized as follows (see config/Combination.xml and config/ee.xml for examples)</p>
29 
30 <ul>
31 <li> - a top level 'Combination' that is composed of:</li>
32 <ul>
33  <li>- several 'Channels' (eg. ee, emu, mumu), which are composed of:</li>
34  <ul>
35  <li>- several 'Samples' (eg. signal, bkg1, bkg2, ...), each of which has:</li>
36  <ul>
37  <li> - a name</li>
38  <li> - if the sample is normalized by theory (eg N = L*sigma) or not (eg. data driven)</li>
39  <li> - a nominal expectation histogram</li>
40  <li> - a named 'Normalization Factor' (which can be fixed or allowed to float in a fit)</li>
41  <li> - several 'Overall Systematics' in normalization with:</li>
42  <ul>
43  <li> - a name</li>
44  <li> - +/- 1 sigma variations (eg. 1.05 and 0.95 for a 5% uncertainty)</li>
45  </ul>
46  <li>- several 'Histogram Systematics' in shape with:</li>
47  <ul>
48  <li>- a name (which can be shared with the OverallSyst if correlated)</li>
49  <li>- +/- 1 sigma variational histograms</li>
50  </ul>
51  </ul>
52  </ul>
53  <li>- several 'Measurements' (corresponding to a full fit of the model) each of which specifies</li>
54  <ul>
55  <li>- a name for this fit to be used in tables and files</li>
56  <ul>
57  <li> - what is the luminosity associated to the measurement in picobarns</li>
58  <li> - which bins of the histogram should be used</li>
59  <li> - what is the relative uncertainty on the luminosity </li>
60  <li> - what is (are) the parameter(s) of interest that will be measured</li>
61  <li> - which parameters should be fixed/floating (eg. nuisance parameters)</li>
62  </ul>
63  </ul>
64 </ul>
65 END_HTML
66 */
67 //
68 
69 
70 // from std
71 #include <string>
72 #include <vector>
73 #include <map>
74 #include <iostream>
75 #include <sstream>
76 
77 // from root
78 #include "TFile.h"
79 #include "TH1F.h"
80 #include "TDOMParser.h"
81 #include "TXMLAttr.h"
82 #include "TString.h"
83 
84 // from roofit
85 #include "RooStats/ModelConfig.h"
86 
87 // from this package
88 #include "Helper.h"
93 
94 
95 using namespace RooFit;
96 using namespace RooStats;
97 using namespace HistFactory;
98 
99 using namespace std;
100 
101 void topDriver(string input);
102 // void fastDriver(string input); // in MakeModelAndMeasurementsFast
103 
104 /*
105 //_____________________________batch only_____________________
106 #ifndef __CINT__
107 
108 int main(int argc, char** argv) {
109 
110  if(! (argc>1)) {
111  cerr << "need input file" << endl;
112  exit(1);
113  }
114 
115  if(argc==2){
116  string input(argv[1]);
117  try {
118  fastDriver(input);
119  }
120  catch (std::string str) {
121  cerr << "caught exception: " << str << endl ;
122  }
123  catch( const exception& e ) {
124  cerr << "Caught Exception: " << e.what() << endl;
125  }
126  }
127 
128  if(argc==3){
129  string flag(argv[1]);
130  string input(argv[2]);
131  if(flag=="-standard_form")
132  try {
133  fastDriver(input);
134  }
135  catch (std::string str) {
136  cerr << "caught exception: " << str << endl ;
137  }
138  catch( const exception& e ) {
139  cerr << "Caught Exception: " << e.what() << endl;
140  }
141  else if(flag=="-number_counting_form")
142  try {
143  topDriver(input);
144  }
145  catch (std::string str) {
146  cerr << "caught exception: " << str << endl ;
147  }
148  catch( const exception& e ) {
149  cerr << "Caught Exception: " << e.what() << endl;
150  }
151 
152  else
153  cerr <<"unrecognized flag. Options are -standard_form or -number_counting_form"<<endl;
154 
155  }
156  return 0;
157 }
158 
159 #endif
160 */
161 
162 void topDriver( string input ) {
163 
164 
165  // Make the list of measurements and channels
166  std::vector< HistFactory::Measurement > measurement_list;
167  //std::vector< HistFactory::Channel > channel_list;
168 
169 
170  HistFactory::ConfigParser xmlParser;
171 
172  // Fill them using the XML parser
173  //xmlParser.FillMeasurementsAndChannelsFromXML( input, measurement_list, channel_list );
174 
175  measurement_list = xmlParser.GetMeasurementsFromXML( input );
176 
177  // At this point, we have all the information we need
178  // from the xml files.
179 
180 
181  // We will make the measurements 1-by-1
182  // This part will be migrated to the
183  // MakeModelAndMeasurements function,
184  // but is here for now.
185 
186 
187  for(unsigned int i = 0; i < measurement_list.size(); ++i) {
188 
189  HistFactory::Measurement measurement = measurement_list.at(i);
190 
191  // Add the channels to this measurement
192  //for( unsigned int chanItr = 0; chanItr < channel_list.size(); ++chanItr ) {
193  // measurement.channels.push_back( channel_list.at( chanItr ) );
194  //}
195 
196  // This part (OF COURSE) needs to be added:
197 
198 
199  std::string rowTitle = measurement.GetName();
200  std::string outputFileName = measurement.GetOutputFilePrefix() + "_" + measurement.GetName() + ".root";
201 
202  double lumiError = measurement.GetLumi()*measurement.GetLumiRelErr();
203 
204  TFile* outFile = new TFile(outputFileName.c_str(), "recreate");
205  HistoToWorkspaceFactory factory(measurement.GetOutputFilePrefix(), rowTitle, measurement.GetConstantParams(),
206  measurement.GetLumi(), lumiError,
207  measurement.GetBinLow(), measurement.GetBinHigh(), outFile);
208 
209  // Create the workspaces for the channels
210  vector<RooWorkspace*> channel_workspaces;
211  vector<string> channel_names;
212 
213 
214  // Loop over channels and make the individual
215  // channel fits:
216 
217 
218  // read the xml for each channel and combine
219 
220  for( unsigned int chanItr = 0; chanItr < measurement.GetChannels().size(); ++chanItr ) {
221 
222  HistFactory::Channel& channel = measurement.GetChannels().at( chanItr );
223 
224 
225  string ch_name=channel.GetName();
226  channel_names.push_back(ch_name);
227 
228  std::vector< EstimateSummary > dummy;
229  RooWorkspace* ws = factory.MakeSingleChannelModel( dummy, measurement.GetConstantParams() );
230  if( ws==NULL ) {
231  std::cout << "Failed to create SingleChannelModel for channel: " << channel.GetName()
232  << " and measurement: " << measurement.GetName() << std::endl;
233  throw hf_exc();
234  }
235  //RooWorkspace* ws = factory.MakeSingleChannelModel( channel );
236  channel_workspaces.push_back(ws);
237 
238  // set poi in ModelConfig
239  ModelConfig* proto_config = (ModelConfig *) ws->obj("ModelConfig");
240 
241  std::cout << "Setting Parameter of Interest as :" << measurement.GetPOI() << endl;
242  RooRealVar* poi = (RooRealVar*) ws->var( (measurement.GetPOI()).c_str() );
243  RooArgSet * params= new RooArgSet;
244  if(poi){
245  params->add(*poi);
246  }
247  proto_config->SetParametersOfInterest(*params);
248 
249 
250  // Gamma/Uniform Constraints:
251  // turn some Gaussian constraints into Gamma/Uniform/LogNorm constraints, rename model newSimPdf
252  if( measurement.GetGammaSyst().size()>0 || measurement.GetUniformSyst().size()>0 || measurement.GetLogNormSyst().size()>0) {
253  factory.EditSyst( ws, ("model_"+ch_name).c_str(), measurement.GetGammaSyst(), measurement.GetUniformSyst(), measurement.GetLogNormSyst());
254  proto_config->SetPdf( *ws->pdf("newSimPdf") );
255  }
256 
257  // fill out ModelConfig and export
258  RooAbsData* expData = ws->data("expData");
259  if(poi){
260  proto_config->GuessObsAndNuisance(*expData);
261  }
262  std::string ChannelFileName = measurement.GetOutputFilePrefix() + "_" + ch_name + "_" + rowTitle + "_model.root";
263  ws->writeToFile( ChannelFileName.c_str() );
264 
265  // Now, write the measurement to the file
266  // Make a new measurement for only this channel
267  RooStats::HistFactory::Measurement meas_chan( measurement );
268  meas_chan.GetChannels().clear();
269  meas_chan.GetChannels().push_back( channel );
270  TFile* chanFile = TFile::Open( ChannelFileName.c_str(), "UPDATE" );
271  meas_chan.writeToFile( chanFile );
272  chanFile->Close();
273 
274  // do fit unless exportOnly requested
275  if(! measurement.GetExportOnly() ){
276  if(!poi){
277  cout <<"can't do fit for this channel, no parameter of interest"<<endl;
278  } else{
279  factory.FitModel(ws, ch_name, "newSimPdf", "expData", false);
280  }
281  }
282  fprintf(factory.pFile, " & " );
283  }
284 
285 
286  // Now, combine the channels
287  RooWorkspace* ws=factory.MakeCombinedModel(channel_names, channel_workspaces);
288  if( ws == NULL ) {
289  std::cout << "Error: Failed to create workspace" << std::endl;
290  throw hf_exc();
291  }
292  // Gamma/Uniform Constraints:
293  // turn some Gaussian constraints into Gamma/Uniform/logNormal constraints, rename model newSimPdf
294  if( measurement.GetGammaSyst().size()>0 || measurement.GetUniformSyst().size()>0 || measurement.GetLogNormSyst().size()>0)
295  factory.EditSyst(ws, "simPdf", measurement.GetGammaSyst(), measurement.GetUniformSyst(), measurement.GetLogNormSyst());
296  //
297  // set parameter of interest according to the configuration
298  //
299  ModelConfig * combined_config = (ModelConfig *) ws->obj("ModelConfig");
300  cout << "Setting Parameter of Interest as :" << measurement.GetPOI() << endl;
301  RooRealVar* poi = (RooRealVar*) ws->var( (measurement.GetPOI()).c_str() );
302  //RooRealVar* poi = (RooRealVar*) ws->var((POI+"_comb").c_str());
303  RooArgSet * params= new RooArgSet;
304  cout << poi << endl;
305  if(poi){
306  params->add(*poi);
307  }
308  combined_config->SetParametersOfInterest(*params);
309  ws->Print();
310 
311  // Set new PDF if there are gamma/uniform constraint terms
312  if( measurement.GetGammaSyst().size()>0 || measurement.GetUniformSyst().size()>0 || measurement.GetLogNormSyst().size()>0)
313  combined_config->SetPdf(*ws->pdf("newSimPdf"));
314 
315  RooAbsData* simData = ws->data("simData");
316  combined_config->GuessObsAndNuisance(*simData);
317  // ws->writeToFile(("results/model_combined_edited.root").c_str());
318  std::string CombinedFileName = measurement.GetOutputFilePrefix()+"_combined_"+rowTitle+"_model.root";
319  ws->writeToFile( CombinedFileName.c_str() );
320  TFile* combFile = TFile::Open( CombinedFileName.c_str(), "UPDATE" );
321  measurement.writeToFile( combFile );
322  combFile->Close();
323 
324 
325 
326  // TO DO:
327  // Totally factorize the statistical test in "fit Model" to a different area
328  if(! measurement.GetExportOnly() ){
329  if(!poi){
330  cout <<"can't do fit for this channel, no parameter of interest"<<endl;
331  } else{
332  factory.FitModel(ws, "combined", "simPdf", "simData", false);
333  }
334  }
335 
336 
337  } // End Loop over measurement_list
338 
339  // Done
340 
341 }
342 
343 /*
344 
345 void topDriver(string input ){
346 
347  // TO DO:
348  // would like to fully factorize the XML parsing.
349  // No clear need to have some here and some in ConfigParser
350 
351  / *** read in the input xml *** /
352  TDOMParser xmlparser;
353  Int_t parseError = xmlparser.ParseFile( input.c_str() );
354  if( parseError ) {
355  std::cerr << "Loading of xml document \"" << input
356  << "\" failed" << std::endl;
357  }
358 
359  cout << "reading input : " << input << endl;
360  TXMLDocument* xmldoc = xmlparser.GetXMLDocument();
361  TXMLNode* rootNode = xmldoc->GetRootNode();
362 
363  if( rootNode->GetNodeName() == TString( "Combination" ) ){
364  string outputFileName, outputFileNamePrefix;
365  vector<string> xml_input;
366 
367  TListIter attribIt = rootNode->GetAttributes();
368  TXMLAttr* curAttr = 0;
369  while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
370  if( curAttr->GetName() == TString( "OutputFilePrefix" ) ) {
371  outputFileNamePrefix=string(curAttr->GetValue());
372  cout << "output file is : " << outputFileName << endl;
373  }
374  }
375 
376  TXMLNode* node = rootNode->GetChildren();
377  while( node != 0 ) {
378  if( node->GetNodeName() == TString( "Input" ) ) {
379  xml_input.push_back(node->GetText());
380  }
381  node = node->GetNextNode();
382  }
383  node = rootNode->GetChildren();
384  while( node != 0 ) {
385  if( node->GetNodeName() == TString( "Measurement" ) ) {
386 
387  Double_t nominalLumi=0, lumiRelError=0, lumiError=0;
388  Int_t lowBin=0, highBin=0;
389  string rowTitle, POI, mode;
390  vector<string> systToFix;
391  map<string,double> gammaSyst;
392  map<string,double> uniformSyst;
393  map<string,double> logNormSyst;
394  bool exportOnly = false;
395 
396  // TListIter attribIt = node->GetAttributes();
397  // TXMLAttr* curAttr = 0;
398  attribIt = node->GetAttributes();
399  curAttr = 0;
400  while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
401 
402  if( curAttr->GetName() == TString( "Lumi" ) ) {
403  nominalLumi=atof(curAttr->GetValue());
404  }
405  if( curAttr->GetName() == TString( "LumiRelErr" ) ) {
406  lumiRelError=atof(curAttr->GetValue());
407  }
408  if( curAttr->GetName() == TString( "BinLow" ) ) {
409  lowBin=atoi(curAttr->GetValue());
410  }
411  if( curAttr->GetName() == TString( "BinHigh" ) ) {
412  highBin=atoi(curAttr->GetValue());
413  }
414  if( curAttr->GetName() == TString( "Name" ) ) {
415  rowTitle=curAttr->GetValue();
416  outputFileName=outputFileNamePrefix+"_"+rowTitle+".root";
417  }
418  if( curAttr->GetName() == TString( "Mode" ) ) {
419  cout <<"\n INFO: Mode attribute is deprecated, will ignore\n"<<endl;
420  mode=curAttr->GetValue();
421  }
422  if( curAttr->GetName() == TString( "ExportOnly" ) ) {
423  if(curAttr->GetValue() == TString( "True" ) )
424  exportOnly = true;
425  else
426  exportOnly = false;
427  }
428  }
429 
430  if(highBin==0){
431  cout <<"\nERROR: In -number_counting_form must specify BinLow and BinHigh\n"<<endl;
432  return;
433  }
434 
435  lumiError=nominalLumi*lumiRelError;
436 
437  TXMLNode* mnode = node->GetChildren();
438  while( mnode != 0 ) {
439  if( mnode->GetNodeName() == TString( "POI" ) ) {
440  POI=mnode->GetText();
441  }
442  if( mnode->GetNodeName() == TString( "ParamSetting" ) ) {
443  // TListIter attribIt = mnode->GetAttributes();
444  //TXMLAttr* curAttr = 0;
445  attribIt = mnode->GetAttributes();
446  curAttr = 0;
447  while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
448  if( curAttr->GetName() == TString( "Const" ) ) {
449  if(curAttr->GetValue()==TString("True")){
450  AddSubStrings(systToFix, mnode->GetText());
451  }
452  }
453  }
454  }
455  if( mnode->GetNodeName() == TString( "ConstraintTerm" ) ) {
456  vector<string> syst; string type = ""; double rel = 0;
457  AddSubStrings(syst,mnode->GetText());
458  // TListIter attribIt = mnode->GetAttributes();
459  // TXMLAttr* curAttr = 0;
460  attribIt = mnode->GetAttributes();
461  curAttr = 0;
462  while( ( curAttr = dynamic_cast< TXMLAttr* >( attribIt() ) ) != 0 ) {
463  if( curAttr->GetName() == TString( "Type" ) ) {
464  type = curAttr->GetValue();
465  }
466  if( curAttr->GetName() == TString( "RelativeUncertainty" ) ) {
467  rel = atof(curAttr->GetValue());
468  }
469  }
470  if (type=="Gamma" && rel!=0) {
471  for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) gammaSyst[(*it).c_str()] = rel;
472  }
473  if (type=="Uniform" && rel!=0) {
474  for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) uniformSyst[(*it).c_str()] = rel;
475  }
476  if (type=="LogNormal" && rel!=0) {
477  for (vector<string>::const_iterator it=syst.begin(); it!=syst.end(); it++) logNormSyst[(*it).c_str()] = rel;
478  }
479  }
480  mnode = mnode->GetNextNode();
481  }
482 
483  / * Do measurement * /
484  cout << "using lumi = " << nominalLumi << " and lumiError = " << lumiError
485  << " including bins between " << lowBin << " and " << highBin << endl;
486  cout << "fixing the following parameters:" << endl;
487  for(vector<string>::iterator itr=systToFix.begin(); itr!=systToFix.end(); ++itr){
488  cout << " " << *itr << endl;
489  }
490 
491  / ***
492  Construction of Model. Only requirement is that they return vector<vector<EstimateSummary> >
493  This is where we use the factory.
494  *** /
495 
496  vector<vector<EstimateSummary> > summaries;
497  if(xml_input.empty()){
498  cerr << "no input channels found" << endl;
499  exit(1);
500  }
501 
502 
503  vector<RooWorkspace*> chs;
504  vector<string> ch_names;
505  TFile* outFile = new TFile(outputFileName.c_str(), "recreate");
506  HistoToWorkspaceFactory factory(outputFileNamePrefix, rowTitle, systToFix, nominalLumi, lumiError, lowBin, highBin , outFile);
507 
508 
509  // for results tables
510  fprintf(factory.pFile, " %s &", rowTitle.c_str() );
511 
512  // read the xml for each channel and combine
513  for(vector<string>::iterator itr=xml_input.begin(); itr!=xml_input.end(); ++itr){
514  vector<EstimateSummary> oneChannel;
515  // read xml
516  ReadXmlConfig(*itr, oneChannel, nominalLumi);
517  // not really needed anymore
518  summaries.push_back(oneChannel);
519  // use factory to create the workspace
520  string ch_name=oneChannel[0].channel;
521  ch_names.push_back(ch_name);
522  RooWorkspace * ws = factory.MakeSingleChannelModel(oneChannel, systToFix);
523  chs.push_back(ws);
524  // set poi in ModelConfig
525  ModelConfig * proto_config = (ModelConfig *) ws->obj("ModelConfig");
526  cout << "Setting Parameter of Interest as :" << POI << endl;
527  RooRealVar* poi = (RooRealVar*) ws->var(POI.c_str());
528  RooArgSet * params= new RooArgSet;
529  if(poi){
530  params->add(*poi);
531  }
532  proto_config->SetParametersOfInterest(*params);
533 
534 
535  // Gamma/Uniform Constraints:
536  // turn some Gaussian constraints into Gamma/Uniform/LogNorm constraints, rename model newSimPdf
537  if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0) {
538  factory.EditSyst(ws,("model_"+oneChannel[0].channel).c_str(),gammaSyst,uniformSyst,logNormSyst);
539  proto_config->SetPdf(*ws->pdf("newSimPdf"));
540  }
541 
542  // fill out ModelConfig and export
543  RooAbsData* expData = ws->data("expData");
544  if(poi){
545  proto_config->GuessObsAndNuisance(*expData);
546  }
547  ws->writeToFile((outputFileNamePrefix+"_"+ch_name+"_"+rowTitle+"_model.root").c_str());
548 
549  // do fit unless exportOnly requested
550  if(!exportOnly){
551  if(!poi){
552  cout <<"can't do fit for this channel, no parameter of interest"<<endl;
553  } else{
554  factory.FitModel(ws, ch_name, "newSimPdf", "expData", false);
555  }
556  }
557  fprintf(factory.pFile, " & " );
558  }
559 
560  / ***
561  Make the combined model:
562  If you want output histograms in root format, create and pass it to the combine routine.
563  "combine" : will do the individual cross-section measurements plus combination
564 
565  *** /
566 
567 
568 
569  if(true || mode.find("comb")!=string::npos){ //KC: deprecating Mode="Comb"
570  RooWorkspace* ws=factory.MakeCombinedModel(ch_names,chs);
571  // Gamma/Uniform Constraints:
572  // turn some Gaussian constraints into Gamma/Uniform/logNormal constraints, rename model newSimPdf
573  if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0)
574  factory.EditSyst(ws,"simPdf",gammaSyst,uniformSyst,logNormSyst);
575  //
576  // set parameter of interest according to the configuration
577  //
578  ModelConfig * combined_config = (ModelConfig *) ws->obj("ModelConfig");
579  cout << "Setting Parameter of Interest as :" << POI << endl;
580  RooRealVar* poi = (RooRealVar*) ws->var((POI).c_str());
581  //RooRealVar* poi = (RooRealVar*) ws->var((POI+"_comb").c_str());
582  RooArgSet * params= new RooArgSet;
583  cout << poi << endl;
584  if(poi){
585  params->add(*poi);
586  }
587  combined_config->SetParametersOfInterest(*params);
588  ws->Print();
589 
590  // Set new PDF if there are gamma/uniform constraint terms
591  if(gammaSyst.size()>0 || uniformSyst.size()>0 || logNormSyst.size()>0)
592  combined_config->SetPdf(*ws->pdf("newSimPdf"));
593 
594  RooAbsData* simData = ws->data("simData");
595  combined_config->GuessObsAndNuisance(*simData);
596  // ws->writeToFile(("results/model_combined_edited.root").c_str());
597  ws->writeToFile((outputFileNamePrefix+"_combined_"+rowTitle+"_model.root").c_str());
598 
599  // TO DO:
600  // Totally factorize the statistical test in "fit Model" to a different area
601  if(!exportOnly){
602  if(!poi){
603  cout <<"can't do fit for this channel, no parameter of interest"<<endl;
604  } else{
605  factory.FitModel(ws, "combined", "simPdf", "simData", false);
606  }
607  }
608 
609  }
610 
611 
612  fprintf(factory.pFile, " \\\\ \n");
613 
614  outFile->Close();
615  delete outFile;
616 
617  }
618  node = node->GetNextNode(); // next measurement
619  }
620  }
621 }
622 
623 
624 
625 */