Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
SPlot.cxx
Go to the documentation of this file.
1 // @(#)root/roostats:$Id$
2 // Author: Kyle Cranmer 28/07/2008
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2008, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /*****************************************************************************
13  * Project: RooStats
14  * Package: RooFit/RooStats
15  *
16  * Authors:
17  * Original code from M. Pivk as part of MLFit package from BaBar.
18  * Modifications:
19  * Giacinto Piacquadio, Maurizio Pierini: modifications for new RooFit version
20  * George H. Lewis, Kyle Cranmer: generalized for weighted events
21  *
22  * Porting to RooStats (with permission) by Kyle Cranmer, July 2008
23  *
24  *****************************************************************************/
25 
26 
27 /** \class RooStats::SPlot
28  \ingroup Roostats
29 
30  This class calculates sWeights used to create an sPlot.
31  The code is based on
32  ``SPlot: A statistical tool to unfold data distributions,''
33  Nucl. Instrum. Meth. A 555, 356 (2005) [arXiv:physics/0402083].
34 
35  An SPlot gives us the distribution of some variable, x in our
36  data sample for a given species (eg. signal or background).
37  The result is similar to a likelihood projection plot, but no cuts are made,
38  so every event contributes to the distribution.
39 
40  To use this class, you first must have a pdf that includes
41  yields for (possibly several) different species.
42  Create an instance of the class by supplying a data set,
43  the pdf, and a list of the yield variables. The SPlot Class
44  will calculate SWeights and include these as columns in the RooDataSet.
45 
46 */
47 
48 #include <vector>
49 #include <map>
50 
51 #include "RooStats/SPlot.h"
52 #include "RooAbsPdf.h"
53 #include "RooDataSet.h"
54 #include "RooRealVar.h"
55 #include "RooGlobalFunc.h"
56 #include "TTree.h"
57 #include "RooStats/RooStatsUtils.h"
58 
59 
60 #include "TMatrixD.h"
61 
62 
63 ClassImp(RooStats::SPlot); ;
64 
65 using namespace RooStats;
66 using namespace std;
67 
68 ////////////////////////////////////////////////////////////////////////////////
69 
70 SPlot::~SPlot()
71 {
72  if(TestBit(kOwnData) && fSData)
73  delete fSData;
74 
75 }
76 
77 ////////////////////////////////////////////////////////////////////////////////
78 /// Default constructor
79 
80 SPlot::SPlot():
81  TNamed()
82 {
83  RooArgList Args;
84 
85  fSWeightVars = Args;
86 
87  fSData = NULL;
88 
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 
93 SPlot::SPlot(const char* name, const char* title):
94  TNamed(name, title)
95 {
96  RooArgList Args;
97 
98  fSWeightVars = Args;
99 
100  fSData = NULL;
101 
102 }
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 ///Constructor from a RooDataSet
106 ///No sWeighted variables are present
107 
108 SPlot::SPlot(const char* name, const char* title, const RooDataSet &data):
109  TNamed(name, title)
110 {
111  RooArgList Args;
112 
113  fSWeightVars = Args;
114 
115  fSData = (RooDataSet*) &data;
116 }
117 
118 ////////////////////////////////////////////////////////////////////////////////
119 /// Copy Constructor from another SPlot
120 
121 SPlot::SPlot(const SPlot &other):
122  TNamed(other)
123 {
124  RooArgList Args = (RooArgList) other.GetSWeightVars();
125 
126  fSWeightVars.addClone(Args);
127 
128  fSData = (RooDataSet*) other.GetSDataSet();
129 
130 }
131 
132 ////////////////////////////////////////////////////////////////////////////////
133 ///Construct a new SPlot instance, calculate sWeights, and include them
134 ///in the RooDataSet held by this instance.
135 ///
136 /// The constructor automatically calls AddSWeight() to add s weights to the dataset.
137 /// These can be retrieved later using GetSWeight() or GetSDataSet().
138 ///\param[in] name Name of the instance.
139 ///\param[in] title Title of the instance.
140 ///\param[in] data Dataset to fit to.
141 ///\param[in] pdf PDF to compute s weights for.
142 ///\param[in] yieldsList List of parameters in `pdf` that are yields.
143 ///\param[in] projDeps Don't normalise over these parameters when calculating the sWeights. Will be passed on to AddSWeight().
144 ///\param[in] includeWeights Whether or not to include the weights in `data`. Passed on to AddSWeight().
145 ///\param[in] cloneData Make a clone of the incoming data before adding weights.
146 ///\param[in] newName New name for the data.
147 ///\param[in] argX Additional arguments for the fitting step in AddSWeight().
148 SPlot::SPlot(const char* name, const char* title, RooDataSet& data, RooAbsPdf* pdf,
149  const RooArgList &yieldsList, const RooArgSet &projDeps,
150  bool includeWeights, bool cloneData, const char* newName,
151  const RooCmdArg& arg5, const RooCmdArg& arg6, const RooCmdArg& arg7, const RooCmdArg& arg8):
152  TNamed(name, title)
153 {
154  if(cloneData == 1) {
155  fSData = (RooDataSet*) data.Clone(newName);
156  SetBit(kOwnData);
157  }
158  else
159  fSData = (RooDataSet*) &data;
160 
161  // Add check that yieldsList contains all RooRealVars
162  for (const auto arg : yieldsList) {
163  if (!dynamic_cast<const RooRealVar*>(arg)) {
164  coutE(InputArguments) << "SPlot::SPlot(" << GetName() << ") input argument "
165  << arg->GetName() << " is not of type RooRealVar " << endl ;
166  throw string(Form("SPlot::SPlot(%s) input argument %s is not of type RooRealVar",GetName(),arg->GetName())) ;
167  }
168  }
169 
170  //Construct a new SPlot class,
171  //calculate sWeights, and include them
172  //in the RooDataSet of this class.
173 
174  this->AddSWeight(pdf, yieldsList, projDeps, includeWeights, arg5, arg6, arg7, arg8);
175 }
176 
177 ////////////////////////////////////////////////////////////////////////////////
178 /// Set dataset (if not passed in constructor).
179 RooDataSet* SPlot::SetSData(RooDataSet* data)
180 {
181  if(data) {
182  fSData = (RooDataSet*) data;
183  return fSData;
184  } else
185  return NULL;
186 }
187 
188 ////////////////////////////////////////////////////////////////////////////////
189 /// Retrieve s-weighted data.
190 /// It does **not** automatically call AddSWeight(). This needs to be done manually.
191 RooDataSet* SPlot::GetSDataSet() const
192 {
193  return fSData;
194 }
195 
196 ////////////////////////////////////////////////////////////////////////////////
197 /// Retrieve an s weight.
198 /// \param[in] numEvent Event number to retrieve s weight for.
199 /// \param[in] sVariable The yield parameter to retrieve the s weight for.
200 Double_t SPlot::GetSWeight(Int_t numEvent, const char* sVariable) const
201 {
202  if(numEvent > fSData->numEntries() )
203  {
204  coutE(InputArguments) << "Invalid Entry Number" << endl;
205  return -1;
206  }
207 
208  if(numEvent < 0)
209  {
210  coutE(InputArguments) << "Invalid Entry Number" << endl;
211  return -1;
212  }
213 
214  Double_t totalYield = 0;
215 
216  std::string varname(sVariable);
217  varname += "_sw";
218 
219 
220  if(fSWeightVars.find(sVariable) )
221  {
222  RooArgSet Row(*fSData->get(numEvent));
223  totalYield += Row.getRealValue(sVariable);
224 
225  return totalYield;
226  }
227 
228  if( fSWeightVars.find(varname.c_str()) )
229  {
230 
231  RooArgSet Row(*fSData->get(numEvent));
232  totalYield += Row.getRealValue(varname.c_str() );
233 
234  return totalYield;
235  }
236 
237  else
238  coutE(InputArguments) << "InputVariable not in list of sWeighted variables" << endl;
239 
240  return -1;
241 }
242 
243 
244 ////////////////////////////////////////////////////////////////////////////////
245 /// Sum the SWeights for a particular event.
246 /// This sum should equal the total weight of that event.
247 /// This method is intended to be used as a check.
248 
249 Double_t SPlot::GetSumOfEventSWeight(Int_t numEvent) const
250 {
251  if(numEvent > fSData->numEntries() )
252  {
253  coutE(InputArguments) << "Invalid Entry Number" << endl;
254  return -1;
255  }
256 
257  if(numEvent < 0)
258  {
259  coutE(InputArguments) << "Invalid Entry Number" << endl;
260  return -1;
261  }
262 
263  Int_t numSWeightVars = this->GetNumSWeightVars();
264 
265  Double_t eventSWeight = 0;
266 
267  RooArgSet Row(*fSData->get(numEvent));
268 
269  for (Int_t i = 0; i < numSWeightVars; i++)
270  eventSWeight += Row.getRealValue(fSWeightVars.at(i)->GetName() );
271 
272  return eventSWeight;
273 }
274 
275 ////////////////////////////////////////////////////////////////////////////////
276 /// Sum the SWeights for a particular species over all events.
277 /// This should equal the total (weighted) yield of that species.
278 /// This method is intended as a check.
279 
280 Double_t SPlot::GetYieldFromSWeight(const char* sVariable) const
281 {
282 
283  Double_t totalYield = 0;
284 
285  std::string varname(sVariable);
286  varname += "_sw";
287 
288 
289  if(fSWeightVars.find(sVariable) )
290  {
291  for(Int_t i=0; i < fSData->numEntries(); i++)
292  {
293  RooArgSet Row(*fSData->get(i));
294  totalYield += Row.getRealValue(sVariable);
295  }
296 
297  return totalYield;
298  }
299 
300  if( fSWeightVars.find(varname.c_str()) )
301  {
302  for(Int_t i=0; i < fSData->numEntries(); i++)
303  {
304  RooArgSet Row(*fSData->get(i));
305  totalYield += Row.getRealValue(varname.c_str() );
306  }
307 
308  return totalYield;
309  }
310 
311  else
312  coutE(InputArguments) << "InputVariable not in list of sWeighted variables" << endl;
313 
314  return -1;
315 }
316 
317 
318 ////////////////////////////////////////////////////////////////////////////////
319 /// Return a RooArgList containing all paramters that have s weights.
320 
321 RooArgList SPlot::GetSWeightVars() const
322 {
323 
324  RooArgList Args = fSWeightVars;
325 
326  return Args;
327 
328 }
329 
330 ////////////////////////////////////////////////////////////////////////////////
331 /// Return the number of SWeights
332 /// In other words, return the number of
333 /// species that we are trying to extract.
334 
335 Int_t SPlot::GetNumSWeightVars() const
336 {
337  RooArgList Args = fSWeightVars;
338 
339  return Args.getSize();
340 }
341 
342 ////////////////////////////////////////////////////////////////////////////////
343 /// Method which adds the sWeights to the dataset.
344 ///
345 /// The SPlot will contain two new variables for each yield parameter:
346 /// - `L_<varname>` is the the likelihood for each event, *i.e.*, the pdf evaluated for the a given value of the variable "varname".
347 /// - `<varname>_sw` is the value of the sWeight for the variable "varname" for each event.
348 ///
349 /// Find Parameters in the PDF to be considered fixed when calculating the SWeights
350 /// and be sure to NOT include the yields in that list.
351 ///
352 /// After fixing non-yield parameters, this function will start a fit by calling
353 /// ```
354 /// pdf->fitTo(*fSData, RooFit::Extended(kTRUE), RooFit::SumW2Error(kTRUE), RooFit::PrintLevel(-1), RooFit::PrintEvalErrors(-1)).
355 /// ```
356 /// One can pass additional arguments to `fitTo`, such as `RooFit::Range("fitrange")`, as `arg5`, `arg6`, `arg7`, `arg8`.
357 ///
358 /// \note A `RooFit::Range` may be necessary to get expected results if you initially fit in a range
359 /// and/or called `pdf->fixCoefRange("fitrange")` on `pdf`.
360 /// Pass `arg5`, `arg6`, `arg7`, `arg8` AT YOUR OWN RISK.
361 ///
362 /// \param[in] pdf PDF to fit to data to compute s weights.
363 /// \param[in] yieldsTmp Yields to use to compute s weights.
364 /// \param[in] projDeps These will not be normalized over when calculating the sWeights,
365 /// and will be considered parameters, not observables.
366 /// \param[in] includeWeights
367 /// \param[in] argX Optional additional arguments for the fitting step.
368 void SPlot::AddSWeight( RooAbsPdf* pdf, const RooArgList &yieldsTmp,
369  const RooArgSet &projDeps, bool includeWeights,
370  const RooCmdArg& arg5, const RooCmdArg& arg6, const RooCmdArg& arg7, const RooCmdArg& arg8)
371 {
372 
373  RooFit::MsgLevel currentLevel = RooMsgService::instance().globalKillBelow();
374 
375  // Find Parameters in the PDF to be considered fixed when calculating the SWeights
376  // and be sure to NOT include the yields in that list
377  RooArgList* constParameters = (RooArgList*)pdf->getParameters(fSData) ;
378  constParameters->remove(yieldsTmp, kTRUE, kTRUE);
379 
380 
381  // Set these parameters constant and store them so they can later
382  // be set to not constant
383  std::vector<RooRealVar*> constVarHolder;
384 
385  for(Int_t i = 0; i < constParameters->getSize(); i++)
386  {
387  RooRealVar* varTemp = ( dynamic_cast<RooRealVar*>( constParameters->at(i) ) );
388  if(varTemp && varTemp->isConstant() == 0 )
389  {
390  varTemp->setConstant();
391  constVarHolder.push_back(varTemp);
392  }
393  }
394 
395  // Fit yields to the data with all other variables held constant
396  // This is necessary because SPlot assumes the yields minimise -Log(likelihood)
397 
398  pdf->fitTo(*fSData, RooFit::Extended(kTRUE), RooFit::SumW2Error(kTRUE), RooFit::PrintLevel(-1), RooFit::PrintEvalErrors(-1), arg5, arg6, arg7, arg8);
399 
400  // Hold the value of the fitted yields
401  std::vector<double> yieldsHolder;
402 
403  for(Int_t i = 0; i < yieldsTmp.getSize(); i++)
404  yieldsHolder.push_back( ((RooRealVar*) yieldsTmp.at(i))->getVal());
405 
406  Int_t nspec = yieldsTmp.getSize();
407  RooArgList yields = *(RooArgList*)yieldsTmp.snapshot(kFALSE);
408 
409  if(currentLevel <= RooFit::DEBUG)
410  {
411  coutI(InputArguments) << "Printing Yields" << endl;
412  yields.Print();
413  }
414 
415  // The list of variables to normalize over when calculating PDF values.
416 
417  RooArgSet vars(*fSData->get() );
418  vars.remove(projDeps, kTRUE, kTRUE);
419 
420  // Attach data set
421 
422  // const_cast<RooAbsPdf*>(pdf)->attachDataSet(*fSData);
423 
424  pdf->attachDataSet(*fSData);
425 
426  // first calculate the pdf values for all species and all events
427  std::vector<RooRealVar*> yieldvars ;
428  RooArgSet* parameters = pdf->getParameters(fSData) ;
429 
430  std::vector<Double_t> yieldvalues ;
431  for (Int_t k = 0; k < nspec; ++k)
432  {
433  RooRealVar* thisyield = dynamic_cast<RooRealVar*>(yields.at(k)) ;
434  if (thisyield) {
435  RooRealVar* yieldinpdf = dynamic_cast<RooRealVar*>(parameters->find(thisyield->GetName() )) ;
436 
437  if (yieldinpdf) {
438  coutI(InputArguments)<< "yield in pdf: " << yieldinpdf->GetName() << " " << thisyield->getVal() << endl;
439 
440  yieldvars.push_back(yieldinpdf) ;
441  yieldvalues.push_back(thisyield->getVal()) ;
442  }
443  }
444  }
445 
446  Int_t numevents = fSData->numEntries() ;
447 
448  std::vector<std::vector<Double_t> > pdfvalues(numevents,std::vector<Double_t>(nspec,0)) ;
449 
450 
451  // set all yield to zero
452  for(Int_t m=0; m<nspec; ++m) yieldvars[m]->setVal(0) ;
453 
454 
455  // For every event and for every specie,
456  // calculate the value of the component pdf for that specie
457  // by setting the yield of that specie to 1
458  // and all others to 0. Evaluate the pdf for each event
459  // and store the values.
460 
461  RooArgSet * pdfvars = pdf->getVariables();
462 
463  for (Int_t ievt = 0; ievt <numevents; ievt++)
464  {
465  // if (ievt % 100 == 0)
466  // coutP(Eval) << ".";
467 
468 
469  //FIX THIS PART, EVALUATION PROGRESS!!
470 
471  RooStats::SetParameters(fSData->get(ievt), pdfvars);
472 
473  // RooArgSet row(*fSData->get(ievt));
474 
475  for(Int_t k = 0; k < nspec; ++k)
476  {
477  //Check that range of yields is at least (0,1), and fix otherwise
478  if(yieldvars[k]->getMin() > 0)
479  {
480  coutW(InputArguments) << "Minimum Range for " << yieldvars[k]->GetName() << " must be 0. ";
481  coutW(InputArguments) << "Setting min range to 0" << std::endl;
482  yieldvars[k]->setMin(0);
483  }
484 
485  if(yieldvars[k]->getMax() < 1)
486  {
487  coutW(InputArguments) << "Maximum Range for " << yieldvars[k]->GetName() << " must be 1. ";
488  coutW(InputArguments) << "Setting max range to 1" << std::endl;
489  yieldvars[k]->setMax(1);
490  }
491 
492  // set this yield to 1
493  yieldvars[k]->setVal( 1 ) ;
494  // evaluate the pdf
495  Double_t f_k = pdf->getVal(&vars) ;
496  pdfvalues[ievt][k] = f_k ;
497  if( !(f_k>1 || f_k<1) )
498  coutW(InputArguments) << "Strange pdf value: " << ievt << " " << k << " " << f_k << std::endl ;
499  yieldvars[k]->setVal( 0 ) ;
500  }
501  }
502  delete pdfvars;
503 
504  // check that the likelihood normalization is fine
505  std::vector<Double_t> norm(nspec,0) ;
506  for (Int_t ievt = 0; ievt <numevents ; ievt++)
507  {
508  Double_t dnorm(0) ;
509  for(Int_t k=0; k<nspec; ++k) dnorm += yieldvalues[k] * pdfvalues[ievt][k] ;
510  for(Int_t j=0; j<nspec; ++j) norm[j] += pdfvalues[ievt][j]/dnorm ;
511  }
512 
513  coutI(Contents) << "likelihood norms: " ;
514 
515  for(Int_t k=0; k<nspec; ++k) coutI(Contents) << norm[k] << " " ;
516  coutI(Contents) << std::endl ;
517 
518  // Make a TMatrixD to hold the covariance matrix.
519  TMatrixD covInv(nspec, nspec);
520  for (Int_t i = 0; i < nspec; i++) for (Int_t j = 0; j < nspec; j++) covInv(i,j) = 0;
521 
522  coutI(Contents) << "Calculating covariance matrix";
523 
524 
525  // Calculate the inverse covariance matrix, using weights
526  for (Int_t ievt = 0; ievt < numevents; ++ievt)
527  {
528 
529  fSData->get(ievt) ;
530 
531  // Calculate contribution to the inverse of the covariance
532  // matrix. See BAD 509 V2 eqn. 15
533 
534  // Sum for the denominator
535  Double_t dsum(0);
536  for(Int_t k = 0; k < nspec; ++k)
537  dsum += pdfvalues[ievt][k] * yieldvalues[k] ;
538 
539  for(Int_t n=0; n<nspec; ++n)
540  for(Int_t j=0; j<nspec; ++j)
541  {
542  if(includeWeights == kTRUE)
543  covInv(n,j) += fSData->weight()*pdfvalues[ievt][n]*pdfvalues[ievt][j]/(dsum*dsum) ;
544  else
545  covInv(n,j) += pdfvalues[ievt][n]*pdfvalues[ievt][j]/(dsum*dsum) ;
546  }
547 
548  //ADDED WEIGHT ABOVE
549 
550  }
551 
552  // Covariance inverse should now be computed!
553 
554  // Invert to get the covariance matrix
555  if (covInv.Determinant() <=0)
556  {
557  coutE(Eval) << "SPlot Error: covariance matrix is singular; I can't invert it!" << std::endl;
558  covInv.Print();
559  return;
560  }
561 
562  TMatrixD covMatrix(TMatrixD::kInverted,covInv);
563 
564  //check cov normalization
565  if(currentLevel <= RooFit::DEBUG)
566  {
567  coutI(Eval) << "Checking Likelihood normalization: " << std::endl;
568  coutI(Eval) << "Yield of specie Sum of Row in Matrix Norm" << std::endl;
569  for(Int_t k=0; k<nspec; ++k)
570  {
571  Double_t covnorm(0) ;
572  for(Int_t m=0; m<nspec; ++m) covnorm += covInv[k][m]*yieldvalues[m] ;
573  Double_t sumrow(0) ;
574  for(Int_t m = 0; m < nspec; ++m) sumrow += covMatrix[k][m] ;
575  coutI(Eval) << yieldvalues[k] << " " << sumrow << " " << covnorm << endl ;
576  }
577  }
578 
579  // calculate for each event the sWeight (BAD 509 V2 eq. 21)
580  coutI(Eval) << "Calculating sWeight" << std::endl;
581  std::vector<RooRealVar*> sweightvec ;
582  std::vector<RooRealVar*> pdfvec ;
583  RooArgSet sweightset ;
584 
585  // Create and label the variables
586  // used to store the SWeights
587 
588  fSWeightVars.Clear();
589 
590  for(Int_t k=0; k<nspec; ++k)
591  {
592  std::string wname = std::string(yieldvars[k]->GetName()) + "_sw";
593  RooRealVar* var = new RooRealVar(wname.c_str(),wname.c_str(),0) ;
594  sweightvec.push_back( var) ;
595  sweightset.add(*var) ;
596  fSWeightVars.add(*var);
597 
598  wname = "L_" + std::string(yieldvars[k]->GetName());
599  var = new RooRealVar(wname.c_str(),wname.c_str(),0) ;
600  pdfvec.push_back( var) ;
601  sweightset.add(*var) ;
602  }
603 
604  // Create and fill a RooDataSet
605  // with the SWeights
606 
607  RooDataSet* sWeightData = new RooDataSet("dataset", "dataset with sWeights", sweightset);
608 
609  for(Int_t ievt = 0; ievt < numevents; ++ievt)
610  {
611 
612  fSData->get(ievt) ;
613 
614  // sum for denominator
615  Double_t dsum(0);
616  for(Int_t k = 0; k < nspec; ++k) dsum += pdfvalues[ievt][k] * yieldvalues[k] ;
617  // covariance weighted pdf for each specief
618  for(Int_t n=0; n<nspec; ++n)
619  {
620  Double_t nsum(0) ;
621  for(Int_t j=0; j<nspec; ++j) nsum += covMatrix(n,j) * pdfvalues[ievt][j] ;
622 
623 
624  //Add the sWeights here!!
625  //Include weights,
626  //ie events weights are absorbed into sWeight
627 
628 
629  if(includeWeights == kTRUE) sweightvec[n]->setVal(fSData->weight() * nsum/dsum) ;
630  else sweightvec[n]->setVal( nsum/dsum) ;
631 
632  pdfvec[n]->setVal( pdfvalues[ievt][n] ) ;
633 
634  if( !(fabs(nsum/dsum)>=0 ) )
635  {
636  coutE(Contents) << "error: " << nsum/dsum << endl ;
637  return;
638  }
639  }
640 
641  sWeightData->add(sweightset) ;
642  }
643 
644 
645  // Add the SWeights to the original data set
646 
647  fSData->merge(sWeightData);
648 
649  delete sWeightData;
650 
651  //Restore yield values
652 
653  for(Int_t i = 0; i < yieldsTmp.getSize(); i++)
654  ((RooRealVar*) yieldsTmp.at(i))->setVal(yieldsHolder.at(i));
655 
656  //Make any variables that were forced to constant no longer constant
657 
658  for(Int_t i=0; i < (Int_t) constVarHolder.size(); i++)
659  constVarHolder.at(i)->setConstant(kFALSE);
660 
661  return;
662 
663 }