Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TBinomialEfficiencyFitter.cxx
Go to the documentation of this file.
1 // @(#)root/hist:$Id$
2 // Author: Frank Filthaut, Rene Brun 30/05/2007
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2007, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class TBinomialEfficiencyFitter
13  \ingroup Hist
14  \brief Binomial fitter for the division of two histograms.
15 
16 Use when you need to calculate a selection's efficiency from two histograms,
17 one containing all entries, and one containing the subset of these entries
18 that pass the selection, and when you have a parametrization available for
19 the efficiency as a function of the variable(s) under consideration.
20 
21 A very common problem when estimating efficiencies is that of error estimation:
22 when no other information is available than the total number of events N and
23 the selected number n, the best estimate for the selection efficiency p is n/N.
24 Standard binomial statistics dictates that the uncertainty (this presupposes
25 sufficiently high statistics that an approximation by a normal distribution is
26 reasonable) on p, given N, is
27 \f[
28  \sqrt{\frac{p(1-p)}{N}}
29 \f]
30 However, when p is estimated as n/N, fluctuations from the true p to its
31 estimate become important, especially for low numbers of events, and giving
32 rise to biased results.
33 
34 When fitting a parametrized efficiency, these problems can largely be overcome,
35 as a hypothesized true efficiency is available by construction. Even so, simply
36 using the corresponding uncertainty still presupposes that Gaussian errors
37 yields a reasonable approximation. When using, instead of binned efficiency
38 histograms, the original numerator and denominator histograms, a binned maximum
39 likelihood can be constructed as the product of bin-by-bin binomial probabilities
40 to select n out of N events. Assuming that a correct parametrization of the
41 efficiency is provided, this construction in general yields less biased results
42 (and is much less sensitive to binning details).
43 
44 A generic use of this method is given below (note that the method works for 2D
45 and 3D histograms as well):
46 
47 ~~~ {.cpp}
48  {
49  TH1* denominator; // denominator histogram
50  TH1* numerator; // corresponding numerator histogram
51  TF1* eff; // efficiency parametrization
52  .... // set step sizes and initial parameter
53  .... // values for the fit function
54  .... // possibly also set ranges, see TF1::SetRange()
55  TBinomialEfficiencyFitter* f = new TBinomialEfficiencyFitter(
56  numerator, denominator);
57  Int_t status = f->Fit(eff, "I");
58  if (status == 0) {
59  // if the fit was successful, display bin-by-bin efficiencies
60  // as well as the result of the fit
61  numerator->Sumw2();
62  TH1* hEff = dynamic_cast<TH1*>(numerator->Clone("heff"));
63  hEff->Divide(hEff, denominator, 1.0, 1.0, "B");
64  hEff->Draw("E");
65  eff->Draw("same");
66  }
67  }
68 ~~~
69 
70 Note that this method cannot be expected to yield reliable results when using
71 weighted histograms (because the likelihood computation will be incorrect).
72 
73 */
74 
76 
77 #include "TMath.h"
78 #include "TPluginManager.h"
79 #include "TROOT.h"
80 #include "TH1.h"
81 #include "TF1.h"
82 #include "TF2.h"
83 #include "TF3.h"
84 #include "Fit/FitConfig.h"
85 #include "Fit/Fitter.h"
86 #include "TFitResult.h"
87 #include "Math/Functor.h"
88 #include "Math/Functor.h"
89 #include "Math/WrappedMultiTF1.h"
90 
91 #include <limits>
92 
93 
94 const Double_t kDefaultEpsilon = 1E-12;
95 
96 ClassImp(TBinomialEfficiencyFitter);
97 
98 
99 ////////////////////////////////////////////////////////////////////////////////
100 /// default constructor
101 
102 TBinomialEfficiencyFitter::TBinomialEfficiencyFitter() {
103  fNumerator = 0;
104  fDenominator = 0;
105  fFunction = 0;
106  fFitDone = kFALSE;
107  fAverage = kFALSE;
108  fRange = kFALSE;
109  fEpsilon = kDefaultEpsilon;
110  fFitter = 0;
111 }
112 
113 ////////////////////////////////////////////////////////////////////////////////
114 /// Constructor.
115 ///
116 /// Note that no objects are copied, so it is up to the user to ensure that the
117 /// histogram pointers remain valid.
118 ///
119 /// Both histograms need to be "consistent". This is not checked here, but in
120 /// TBinomialEfficiencyFitter::Fit().
121 
122 TBinomialEfficiencyFitter::TBinomialEfficiencyFitter(const TH1 *numerator, const TH1 *denominator) {
123  fEpsilon = kDefaultEpsilon;
124  fFunction = 0;
125  fFitter = 0;
126  Set(numerator,denominator);
127 }
128 
129 ////////////////////////////////////////////////////////////////////////////////
130 /// destructor
131 
132 TBinomialEfficiencyFitter::~TBinomialEfficiencyFitter() {
133  if (fFitter) delete fFitter;
134  fFitter = 0;
135 }
136 
137 ////////////////////////////////////////////////////////////////////////////////
138 /// Initialize with a new set of inputs.
139 
140 void TBinomialEfficiencyFitter::Set(const TH1 *numerator, const TH1 *denominator)
141 {
142  fNumerator = (TH1*)numerator;
143  fDenominator = (TH1*)denominator;
144 
145  fFitDone = kFALSE;
146  fAverage = kFALSE;
147  fRange = kFALSE;
148 }
149 
150 ////////////////////////////////////////////////////////////////////////////////
151 /// Set the required integration precision, see TF1::Integral()
152 
153 void TBinomialEfficiencyFitter::SetPrecision(Double_t epsilon)
154 {
155  fEpsilon = epsilon;
156 }
157 
158 ////////////////////////////////////////////////////////////////////////////////
159 /// Provide access to the underlying fitter object.
160 /// This may be useful e.g. for the retrieval of additional information (such
161 /// as the output covariance matrix of the fit).
162 
163 ROOT::Fit::Fitter* TBinomialEfficiencyFitter::GetFitter()
164 {
165  if (!fFitter) fFitter = new ROOT::Fit::Fitter();
166  return fFitter;
167 
168 }
169 
170 ////////////////////////////////////////////////////////////////////////////////
171 /// Carry out the fit of the given function to the given histograms.
172 ///
173 /// If option "I" is used, the fit function will be averaged over the
174 /// bin (the default is to evaluate it simply at the bin center).
175 ///
176 /// If option "R" is used, the fit range will be taken from the fit
177 /// function (the default is to use the entire histogram).
178 ///
179 /// If option "S" a TFitResult object is returned and it can be used to obtain
180 /// additional fit information, like covariance or correlation matrix.
181 ///
182 /// Note that all parameter values, limits, and step sizes are copied
183 /// from the input fit function f1 (so they should be set before calling
184 /// this method. This is particularly relevant for the step sizes, taken
185 /// to be the "error" set on input, as a null step size usually fixes the
186 /// corresponding parameter. That is protected against, but in such cases
187 /// an arbitrary starting step size will be used, and the reliability of
188 /// the fit should be questioned). If parameters are to be fixed, this
189 /// should be done by specifying non-null parameter limits, with lower
190 /// limits larger than upper limits.
191 ///
192 /// On output, f1 contains the fitted parameters and errors, as well as
193 /// the number of degrees of freedom, and the goodness-of-fit estimator
194 /// as given by S. Baker and R. Cousins, Nucl. Instr. Meth. A221 (1984) 437.
195 
196 TFitResultPtr TBinomialEfficiencyFitter::Fit(TF1 *f1, Option_t* option)
197 {
198  TString opt = option;
199  opt.ToUpper();
200  fAverage = opt.Contains("I");
201  fRange = opt.Contains("R");
202  Bool_t verbose = opt.Contains("V");
203  Bool_t quiet = opt.Contains("Q");
204  Bool_t saveResult = opt.Contains("S");
205  if (!f1) return -1;
206  fFunction = (TF1*)f1;
207  Int_t i, npar;
208  npar = f1->GetNpar();
209  if (npar <= 0) {
210  Error("Fit", "function %s has illegal number of parameters = %d",
211  f1->GetName(), npar);
212  return -3;
213  }
214 
215  // Check that function has same dimension as histogram
216  if (!fNumerator || !fDenominator) {
217  Error("Fit","No numerator or denominator histograms set");
218  return -5;
219  }
220  if (f1->GetNdim() != fNumerator->GetDimension()) {
221  Error("Fit","function %s dimension, %d, does not match histogram dimension, %d",
222  f1->GetName(), f1->GetNdim(), fNumerator->GetDimension());
223  return -4;
224  }
225  // Check that the numbers of bins for the histograms match
226  if (fNumerator->GetNbinsX() != fDenominator->GetNbinsX() ||
227  (f1->GetNdim() > 1 && fNumerator->GetNbinsY() != fDenominator->GetNbinsY()) ||
228  (f1->GetNdim() > 2 && fNumerator->GetNbinsZ() != fDenominator->GetNbinsZ())) {
229  Error("Fit", "numerator and denominator histograms do not have identical numbers of bins");
230  return -6;
231  }
232 
233  // initialize the fitter
234 
235  if (!fFitter) {
236  fFitter = new ROOT::Fit::Fitter();
237  }
238 
239 
240  std::vector<ROOT::Fit::ParameterSettings> & parameters = fFitter->Config().ParamsSettings();
241  parameters.reserve(npar);
242  for (i = 0; i < npar; i++) {
243 
244  // assign an ARBITRARY starting error to ensure the parameter won't be fixed!
245  Double_t we = f1->GetParError(i);
246  if (we <= 0) we = 0.3*TMath::Abs(f1->GetParameter(i));
247  if (we == 0) we = 0.01;
248 
249  parameters.push_back(ROOT::Fit::ParameterSettings(f1->GetParName(i), f1->GetParameter(i), we) );
250 
251  Double_t plow, pup;
252  f1->GetParLimits(i,plow,pup);
253  // when a parameter is fixed is having plow and pup equal to the value (if this is not zero)
254  // we handle special case when fixed parameter has zero value (in that case plow=1 and pup =1 )
255  if (plow >= pup && (plow==f1->GetParameter(i) || pup==f1->GetParameter(i) ||
256  ( f1->GetParameter(i) == 0 && plow==1. && pup == 1.) ) ) {
257  parameters.back().Fix();
258  Info("Fit", "Fixing parameter %s to value %f", f1->GetParName(i), f1->GetParameter(i));
259  } else if (plow < pup) {
260  parameters.back().SetLimits(plow,pup);
261  Info("Fit", "Setting limits for parameter %s to [%f,%f]", f1->GetParName(i), plow,pup);
262  }
263  }
264 
265  // fcn must be set after setting the parameters
266  ROOT::Math::Functor fcnFunction(this, &TBinomialEfficiencyFitter::EvaluateFCN, npar);
267 
268  // set also model function in fitter to have it in FitResult
269  // in this way one can compute for example the confidence intervals
270  fFitter->SetFCN(static_cast<ROOT::Math::IMultiGenFunction &>(fcnFunction), ROOT::Math::WrappedMultiTF1(*f1));
271 
272  // in case default value of 1.0 is used
273  if (fFitter->Config().MinimizerOptions().ErrorDef() == 1.0 ) {
274  fFitter->Config().MinimizerOptions().SetErrorDef(0.5);
275  }
276 
277  if (verbose) {
278  fFitter->Config().MinimizerOptions().SetPrintLevel(3);
279  }
280  else if (quiet) {
281  fFitter->Config().MinimizerOptions().SetPrintLevel(0);
282  }
283 
284  // perform the actual fit
285 
286  fFitDone = kTRUE;
287  Bool_t status = fFitter->FitFCN();
288  if ( !status && !quiet)
289  Warning("Fit","Abnormal termination of minimization.");
290 
291 
292  //Store fit results in fitFunction
293  const ROOT::Fit::FitResult & fitResult = fFitter->Result();
294  if (!fitResult.IsEmpty() ) {
295  // set in f1 the result of the fit
296  f1->SetNDF(fitResult.Ndf() );
297 
298  //f1->SetNumberFitPoints(...); // this is set in ComputeFCN
299 
300  f1->SetParameters( &(fitResult.Parameters().front()) );
301  if ( int( fitResult.Errors().size()) >= f1->GetNpar() )
302  f1->SetParErrors( &(fitResult.Errors().front()) );
303 
304  f1->SetChisquare(2.*fitResult.MinFcnValue()); // store goodness of fit (Baker&Cousins)
305  f1->SetNDF(f1->GetNumberFitPoints()- fitResult.NFreeParameters());
306  Info("result"," chi2 %f ndf %d ",2.*fitResult.MinFcnValue(), fitResult.Ndf() );
307 
308  }
309  // create a new result class if needed
310  if (saveResult) {
311  TFitResult* fr = new TFitResult(fitResult);
312  TString name = TString::Format("TBinomialEfficiencyFitter_result_of_%s",f1->GetName() );
313  fr->SetName(name); fr->SetTitle(name);
314  return TFitResultPtr(fr);
315  }
316  else {
317  return TFitResultPtr(fitResult.Status() );
318  }
319 
320 }
321 
322 ////////////////////////////////////////////////////////////////////////////////
323 /// Compute the likelihood.
324 
325 void TBinomialEfficiencyFitter::ComputeFCN(Double_t& f, const Double_t* par)
326 {
327  int nDim = fDenominator->GetDimension();
328 
329  int xlowbin = fDenominator->GetXaxis()->GetFirst();
330  int xhighbin = fDenominator->GetXaxis()->GetLast();
331  int ylowbin = 0, yhighbin = 0, zlowbin = 0, zhighbin = 0;
332  if (nDim > 1) {
333  ylowbin = fDenominator->GetYaxis()->GetFirst();
334  yhighbin = fDenominator->GetYaxis()->GetLast();
335  if (nDim > 2) {
336  zlowbin = fDenominator->GetZaxis()->GetFirst();
337  zhighbin = fDenominator->GetZaxis()->GetLast();
338  }
339  }
340 
341  fFunction->SetParameters(par);
342 
343  if (fRange) {
344  double xmin, xmax, ymin, ymax, zmin, zmax;
345 
346  // This way to ensure that a minimum range chosen exactly at a
347  // bin boundary is far from elegant, but is hopefully adequate.
348 
349  if (nDim == 1) {
350  fFunction->GetRange(xmin, xmax);
351  xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
352  xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
353  } else if (nDim == 2) {
354  fFunction->GetRange(xmin, ymin, xmax, ymax);
355  xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
356  xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
357  ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
358  yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
359  } else if (nDim == 3) {
360  fFunction->GetRange(xmin, ymin, zmin, xmax, ymax, zmax);
361  xlowbin = fDenominator->GetXaxis()->FindBin(xmin);
362  xhighbin = fDenominator->GetXaxis()->FindBin(xmax);
363  ylowbin = fDenominator->GetYaxis()->FindBin(ymin);
364  yhighbin = fDenominator->GetYaxis()->FindBin(ymax);
365  zlowbin = fDenominator->GetZaxis()->FindBin(zmin);
366  zhighbin = fDenominator->GetZaxis()->FindBin(zmax);
367  }
368  }
369 
370  // The coding below is perhaps somewhat awkward -- but it is done
371  // so that 1D, 2D, and 3D cases can be covered in the same loops.
372 
373  f = 0.;
374 
375  Int_t npoints = 0;
376  Double_t nmax = 0;
377  for (int xbin = xlowbin; xbin <= xhighbin; ++xbin) {
378 
379  // compute the bin edges
380  Double_t xlow = fDenominator->GetXaxis()->GetBinLowEdge(xbin);
381  Double_t xup = fDenominator->GetXaxis()->GetBinLowEdge(xbin+1);
382 
383  for (int ybin = ylowbin; ybin <= yhighbin; ++ybin) {
384 
385  // compute the bin edges (if applicable)
386  Double_t ylow = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin) : 0;
387  Double_t yup = (nDim > 1) ? fDenominator->GetYaxis()->GetBinLowEdge(ybin+1) : 0;
388 
389  for (int zbin = zlowbin; zbin <= zhighbin; ++zbin) {
390 
391  // compute the bin edges (if applicable)
392  Double_t zlow = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin) : 0;
393  Double_t zup = (nDim > 2) ? fDenominator->GetZaxis()->GetBinLowEdge(zbin+1) : 0;
394 
395  int bin = fDenominator->GetBin(xbin, ybin, zbin);
396  Double_t nDen = fDenominator->GetBinContent(bin);
397  Double_t nNum = fNumerator->GetBinContent(bin);
398 
399  // count maximum value to use in the likelihood for inf
400  // i.e. a number much larger than the other terms
401  if (nDen> nmax) nmax = nDen;
402  if (nDen <= 0.) continue;
403  npoints++;
404 
405  // mu is the average of the function over the bin OR
406  // the function evaluated at the bin centre
407  // As yet, there is nothing to prevent mu from being
408  // outside the range <0,1> !!
409 
410  Double_t mu = 0;
411  switch (nDim) {
412  case 1:
413  mu = (fAverage) ?
414  fFunction->Integral(xlow, xup, fEpsilon)
415  / (xup-xlow) :
416  fFunction->Eval(fDenominator->GetBinCenter(bin));
417  break;
418  case 2:
419  {
420  mu = (fAverage) ?
421  ((TF2*)fFunction)->Integral(xlow, xup, ylow, yup, fEpsilon)
422  / ((xup-xlow)*(yup-ylow)) :
423  fFunction->Eval(fDenominator->GetXaxis()->GetBinCenter(xbin),
424  fDenominator->GetYaxis()->GetBinCenter(ybin));
425  }
426  break;
427  case 3:
428  {
429  mu = (fAverage) ?
430  ((TF3*)fFunction)->Integral(xlow, xup, ylow, yup, zlow, zup, fEpsilon)
431  / ((xup-xlow)*(yup-ylow)*(zup-zlow)) :
432  fFunction->Eval(fDenominator->GetXaxis()->GetBinCenter(xbin),
433  fDenominator->GetYaxis()->GetBinCenter(ybin),
434  fDenominator->GetZaxis()->GetBinCenter(zbin));
435  }
436  }
437 
438  // binomial formula (forgetting about the factorials)
439  if (nNum != 0.) {
440  if (mu > 0.)
441  f -= nNum * TMath::Log(mu*nDen/nNum);
442  else
443  f -= nmax * -1E30; // crossing our fingers
444  }
445  if (nDen - nNum != 0.) {
446  if (1. - mu > 0.)
447  f -= (nDen - nNum) * TMath::Log((1. - mu)*nDen/(nDen-nNum));
448  else
449  f -= nmax * -1E30; // crossing our fingers
450  }
451  }
452  }
453  }
454 
455  fFunction->SetNumberFitPoints(npoints);
456 }