Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TLimit.cxx
Go to the documentation of this file.
1 // @(#)root/hist:$Id$
2 // Author: Christophe.Delaere@cern.ch 21/08/2002
3 
4 ///////////////////////////////////////////////////////////////////////////
5 //
6 // TLimit
7 //
8 // Class to compute 95% CL limits
9 //
10 // adapted from the mclimit code from Tom Junk (CLs method)
11 // see http://root.cern.ch/root/doc/TomJunk.pdf
12 // see http://cern.ch/thomasj/searchlimits/ecl.html
13 // see: Tom Junk,NIM A434, p. 435-443, 1999
14 //
15 // see also the following interesting references:
16 // Alex Read, "Presentation of search results: the CLs technique"
17 // Journal of Physics G: Nucl. Part. Phys. 28 2693-2704 (2002).
18 // http://www.iop.org/EJ/abstract/0954-3899/28/10/313/
19 //
20 // A nice article is also available in the CERN yellow report with the proceeding
21 // of the 2000 CERN workshop on confidence intervals.
22 //
23 // Alex Read, "Modified Frequentist Analysis of Search Results (The CLs Method)"
24 // CERN 2000-005 (30 May 2000)
25 //
26 // see note about: "Should I use TRolke, TFeldmanCousins, TLimit?"
27 // in the TRolke class description.
28 //
29 ///////////////////////////////////////////////////////////////////////////
30 
31 /** \class TLimit
32  \ingroup Hist
33  Algorithm to compute 95% C.L. limits using the Likelihood ratio
34  semi-bayesian method.
35 
36  Implemented by C. Delaere from the mclimit code written by Tom Junk [HEP-EX/9902006].
37  See [http://cern.ch/thomasj/searchlimits/ecl.html](http://cern.ch/thomasj/searchlimits/ecl.html) for more details.
38 
39  It takes signal, background and data histograms wrapped in a
40  TLimitDataSource as input and runs a set of Monte Carlo experiments in
41  order to compute the limits. If needed, inputs are fluctuated according
42  to systematics. The output is a TConfidenceLevel.
43 
44  The class TLimitDataSource takes the signal, background and data histograms as well as different
45  systematics sources to form the TLimit input.
46 
47  The class TConfidenceLevel represents the final result of the TLimit algorithm. It is created just after the
48  time-consuming part and can be stored in a TFile for further processing.
49  It contains light methods to return CLs, CLb and other interesting
50  quantities.
51 
52  The actual algorithm...
53 
54  From an input (TLimitDataSource) it produces an output TConfidenceLevel.
55  For this, nmc Monte Carlo experiments are performed.
56  As usual, the larger this number, the longer the compute time,
57  but the better the result.
58 
59  Supposing that there is a plotfile.root file containing 3 histograms
60  (signal, background and data), you can imagine doing things like:
61 
62 ~~~{.cpp}
63  TFile* infile=new TFile("plotfile.root","READ");
64  infile->cd();
65  TH1* sh=(TH1*)infile->Get("signal");
66  TH1* bh=(TH1*)infile->Get("background");
67  TH1* dh=(TH1*)infile->Get("data");
68  TLimitDataSource* mydatasource = new TLimitDataSource(sh,bh,dh);
69  TConfidenceLevel *myconfidence = TLimit::ComputeLimit(mydatasource,50000);
70  std::cout << " CLs : " << myconfidence->CLs() << std::endl;
71  std::cout << " CLsb : " << myconfidence->CLsb() << std::endl;
72  std::cout << " CLb : " << myconfidence->CLb() << std::endl;
73  std::cout << "< CLs > : " << myconfidence->GetExpectedCLs_b() << std::endl;
74  std::cout << "< CLsb > : " << myconfidence->GetExpectedCLsb_b() << std::endl;
75  std::cout << "< CLb > : " << myconfidence->GetExpectedCLb_b() << std::endl;
76  delete myconfidence;
77  delete mydatasource;
78  infile->Close();
79 ~~~
80  More information can still be found on [this page](http://cern.ch/aleph-proj-alphapp/doc/tlimit.html)
81  */
82 
83 #include "TLimit.h"
84 #include "TArrayD.h"
85 #include "TVectorD.h"
86 #include "TOrdCollection.h"
87 #include "TConfidenceLevel.h"
88 #include "TLimitDataSource.h"
89 #include "TRandom3.h"
90 #include "TH1.h"
91 #include "TObjArray.h"
92 #include "TMath.h"
93 #include "TIterator.h"
94 #include "TObjString.h"
95 #include "TClassTable.h"
96 #include "Riostream.h"
97 
98 ClassImp(TLimit);
99 
100 TArrayD *TLimit::fgTable = new TArrayD(0);
101 TOrdCollection *TLimit::fgSystNames = new TOrdCollection();
102 
103 TConfidenceLevel *TLimit::ComputeLimit(TLimitDataSource * data,
104  Int_t nmc, bool stat,
105  TRandom * generator)
106 {
107  // The final object returned...
108  TConfidenceLevel *result = new TConfidenceLevel(nmc);
109  // The random generator used...
110  TRandom *myrandom = generator ? generator : new TRandom3;
111  // Compute some total quantities on all the channels
112  Int_t maxbins = 0;
113  Double_t nsig = 0;
114  Double_t nbg = 0;
115  Int_t ncand = 0;
116  Int_t i;
117  for (i = 0; i <= data->GetSignal()->GetLast(); i++) {
118  maxbins = (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) > maxbins ?
119  (((TH1 *) (data->GetSignal()->At(i)))->GetNbinsX() + 2) : maxbins;
120  nsig += ((TH1 *) (data->GetSignal()->At(i)))->Integral();
121  nbg += ((TH1 *) (data->GetBackground()->At(i)))->Integral();
122  ncand += (Int_t) ((TH1 *) (data->GetCandidates()->At(i)))->Integral();
123  }
124  result->SetBtot(nbg);
125  result->SetStot(nsig);
126  result->SetDtot(ncand);
127  Double_t buffer = 0;
128  fgTable->Set(maxbins * (data->GetSignal()->GetLast() + 1));
129  for (Int_t channel = 0; channel <= data->GetSignal()->GetLast(); channel++)
130  for (Int_t bin = 0;
131  bin <= ((TH1 *) (data->GetSignal()->At(channel)))->GetNbinsX()+1;
132  bin++) {
133  Double_t s = (Double_t) ((TH1 *) (data->GetSignal()->At(channel)))->GetBinContent(bin);
134  Double_t b = (Double_t) ((TH1 *) (data->GetBackground()->At(channel)))->GetBinContent(bin);
135  Double_t d = (Double_t) ((TH1 *) (data->GetCandidates()->At(channel)))->GetBinContent(bin);
136  // Compute the value of the "-2lnQ" for the actual data
137  if ((b == 0) && (s > 0)) {
138  std::cout << "WARNING: Ignoring bin " << bin << " of channel "
139  << channel << " which has s=" << s << " but b=" << b << std::endl;
140  std::cout << " Maybe the MC statistic has to be improved..." << std::endl;
141  }
142  if ((s > 0) && (b > 0))
143  buffer += LogLikelihood(s, b, b, d);
144  // precompute the log(1+s/b)'s in an array to speed up computation
145  // background-free bins are set to have a maximum t.s. value
146  // for protection (corresponding to s/b of about 5E8)
147  if ((s > 0) && (b > 0))
148  fgTable->AddAt(LogLikelihood(s, b, b, 1), (channel * maxbins) + bin);
149  else if ((s > 0) && (b == 0))
150  fgTable->AddAt(20, (channel * maxbins) + bin);
151  }
152  result->SetTSD(buffer);
153  // accumulate MC experiments. Hold the test statistic function fixed, but
154  // fluctuate s and b within syst. errors for computing probabilities of
155  // having that outcome. (Alex Read's prescription -- errors are on the ensemble,
156  // not on the observed test statistic. This technique does not split outcomes.)
157  // keep the tstats as sum log(1+s/b). convert to -2lnQ when preparing the results
158  // (reason -- like to keep the < signs right)
159  Double_t *tss = new Double_t[nmc];
160  Double_t *tsb = new Double_t[nmc];
161  Double_t *lrs = new Double_t[nmc];
162  Double_t *lrb = new Double_t[nmc];
163  TLimitDataSource* tmp_src = (TLimitDataSource*)(data->Clone());
164  TLimitDataSource* tmp_src2 = (TLimitDataSource*)(data->Clone());
165  for (i = 0; i < nmc; i++) {
166  tss[i] = 0;
167  tsb[i] = 0;
168  lrs[i] = 0;
169  lrb[i] = 0;
170  // fluctuate signal and background
171  TLimitDataSource* fluctuated = Fluctuate(data, tmp_src, !i, myrandom, stat) ? tmp_src : data;
172  // make an independent set of fluctuations for reweighting pseudoexperiments
173  // it turns out using the same fluctuated ones for numerator and denominator
174  // is biased.
175  TLimitDataSource* fluctuated2 = Fluctuate(data, tmp_src2, false, myrandom, stat) ? tmp_src2 : data;
176 
177  for (Int_t channel = 0;
178  channel <= fluctuated->GetSignal()->GetLast(); channel++) {
179  for (Int_t bin = 0;
180  bin <=((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetNbinsX()+1;
181  bin++) {
182  if ((Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) != 0) {
183  // s+b hypothesis
184  Double_t rate = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin) +
185  (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
186  Double_t rand = myrandom->Poisson(rate);
187  tss[i] += rand * fgTable->At((channel * maxbins) + bin);
188  Double_t s = (Double_t) ((TH1 *) (fluctuated->GetSignal()->At(channel)))->GetBinContent(bin);
189  Double_t s2= (Double_t) ((TH1 *) (fluctuated2->GetSignal()->At(channel)))->GetBinContent(bin);
190  Double_t b = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
191  Double_t b2= (Double_t) ((TH1 *) (fluctuated2->GetBackground()->At(channel)))->GetBinContent(bin);
192  if ((s > 0) && (b2 > 0))
193  lrs[i] += LogLikelihood(s, b, b2, rand) - s - b + b2;
194  else if ((s > 0) && (b2 == 0))
195  lrs[i] += 20 * rand - s;
196  // b hypothesis
197  rate = (Double_t) ((TH1 *) (fluctuated->GetBackground()->At(channel)))->GetBinContent(bin);
198  rand = myrandom->Poisson(rate);
199  tsb[i] += rand * fgTable->At((channel * maxbins) + bin);
200  if ((s2 > 0) && (b > 0))
201  lrb[i] += LogLikelihood(s2, b2, b, rand) - s2 - b2 + b;
202  else if ((s > 0) && (b == 0))
203  lrb[i] += 20 * rand - s;
204  }
205  }
206  }
207  lrs[i] = TMath::Exp(lrs[i] < 710 ? lrs[i] : 710);
208  lrb[i] = TMath::Exp(lrb[i] < 710 ? lrb[i] : 710);
209  }
210  delete tmp_src;
211  delete tmp_src2;
212  // lrs and lrb are the LR's (no logs) = prob(s+b)/prob(b) for
213  // that choice of s and b within syst. errors in the ensemble. These are
214  // the MC experiment weights for relating the s+b and b PDF's of the unsmeared
215  // test statistic (in which cas one can use another test statistic if one likes).
216 
217  // Now produce the output object.
218  // The final quantities are computed on-demand form the arrays tss, tsb, lrs and lrb.
219  result->SetTSS(tss);
220  result->SetTSB(tsb);
221  result->SetLRS(lrs);
222  result->SetLRB(lrb);
223  if (!generator)
224  delete myrandom;
225  return result;
226 }
227 
228 bool TLimit::Fluctuate(TLimitDataSource * input, TLimitDataSource * output,
229  bool init, TRandom * generator, bool stat)
230 {
231  // initialisation: create a sorted list of all the names of systematics
232  if (init) {
233  // create a "map" with the systematics names
234  TIterator *errornames = input->GetErrorNames()->MakeIterator();
235  TObjArray *listofnames = 0;
236  delete fgSystNames;
237  fgSystNames = new TOrdCollection();
238  while ((listofnames = ((TObjArray *) errornames->Next()))) {
239  TObjString *name = 0;
240  TIterator *loniter = listofnames->MakeIterator();
241  while ((name = (TObjString *) (loniter->Next())))
242  if ((fgSystNames->IndexOf(name)) < 0)
243  fgSystNames->AddLast(name);
244  }
245  fgSystNames->Sort();
246  }
247  // if the output is not given, create it from the input
248  if (!output)
249  output = (TLimitDataSource*)(input->Clone());
250  // if there are no systematics, just returns the input as "fluctuated" output
251  if ((fgSystNames->GetSize() <= 0)&&(!stat)) {
252  return 0;
253  }
254  // if there are only stat, just fluctuate stats.
255  if (fgSystNames->GetSize() <= 0) {
256  output->SetOwner();
257  for (Int_t channel = 0; channel <= input->GetSignal()->GetLast(); channel++) {
258  TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
259  TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
260  if(stat)
261  for(int i=1; i<=newsignal->GetNbinsX(); i++) {
262  newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
263  }
264  newsignal->SetDirectory(0);
265  TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
266  TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
267  if(stat)
268  for(int i=1; i<=newbackground->GetNbinsX(); i++)
269  newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
270  newbackground->SetDirectory(0);
271  }
272  return 1;
273  }
274  // Find a choice for the random variation and
275  // re-toss all random numbers if any background or signal
276  // goes negative. (background = 0 is bad too, so put a little protection
277  // around it -- must have at least 10% of the bg estimate).
278  Bool_t retoss = kTRUE;
279  Double_t *serrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
280  Double_t *berrf = new Double_t[(input->GetSignal()->GetLast()) + 1];
281  do {
282  Double_t *toss = new Double_t[fgSystNames->GetSize()];
283  for (Int_t i = 0; i < fgSystNames->GetSize(); i++)
284  toss[i] = generator->Gaus(0, 1);
285  retoss = kFALSE;
286  for (Int_t channel = 0;
287  channel <= input->GetSignal()->GetLast();
288  channel++) {
289  serrf[channel] = 0;
290  berrf[channel] = 0;
291  for (Int_t bin = 0;
292  bin <((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->GetNrows();
293  bin++) {
294  serrf[channel] += ((TVectorD *) (input->GetErrorOnSignal()->At(channel)))->operator[](bin) *
295  toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
296  berrf[channel] += ((TVectorD *) (input->GetErrorOnBackground()->At(channel)))->operator[](bin) *
297  toss[fgSystNames->BinarySearch((TObjString*) (((TObjArray *) (input->GetErrorNames()->At(channel)))->At(bin)))];
298  }
299  if ((serrf[channel] < -1.0) || (berrf[channel] < -0.9)) {
300  retoss = kTRUE;
301  continue;
302  }
303  }
304  delete[]toss;
305  } while (retoss);
306  // adjust the fluctuated signal and background counts with a legal set
307  // of random fluctuations above.
308  output->SetOwner();
309  for (Int_t channel = 0; channel <= input->GetSignal()->GetLast();
310  channel++) {
311  TH1 *newsignal = (TH1*)(output->GetSignal()->At(channel));
312  TH1 *oldsignal = (TH1*)(input->GetSignal()->At(channel));
313  if(stat)
314  for(int i=1; i<=newsignal->GetNbinsX(); i++)
315  newsignal->SetBinContent(i,oldsignal->GetBinContent(i)+generator->Gaus(0,oldsignal->GetBinError(i)));
316  else
317  for(int i=1; i<=newsignal->GetNbinsX(); i++)
318  newsignal->SetBinContent(i,oldsignal->GetBinContent(i));
319  newsignal->Scale(1 + serrf[channel]);
320  newsignal->SetDirectory(0);
321  TH1 *newbackground = (TH1*)(output->GetBackground()->At(channel));
322  TH1 *oldbackground = (TH1*)(input->GetBackground()->At(channel));
323  if(stat)
324  for(int i=1; i<=newbackground->GetNbinsX(); i++)
325  newbackground->SetBinContent(i,oldbackground->GetBinContent(i)+generator->Gaus(0,oldbackground->GetBinError(i)));
326  else
327  for(int i=1; i<=newbackground->GetNbinsX(); i++)
328  newbackground->SetBinContent(i,oldbackground->GetBinContent(i));
329  newbackground->Scale(1 + berrf[channel]);
330  newbackground->SetDirectory(0);
331  }
332  delete[] serrf;
333  delete[] berrf;
334  return 1;
335 }
336 
337 TConfidenceLevel *TLimit::ComputeLimit(TH1* s, TH1* b, TH1* d,
338  Int_t nmc, bool stat,
339  TRandom * generator)
340 {
341  // Compute limit.
342 
343  TLimitDataSource* lds = new TLimitDataSource(s,b,d);
344  TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
345  delete lds;
346  return out;
347 }
348 
349 TConfidenceLevel *TLimit::ComputeLimit(TH1* s, TH1* b, TH1* d,
350  TVectorD* se, TVectorD* be, TObjArray* l,
351  Int_t nmc, bool stat,
352  TRandom * generator)
353 {
354  // Compute limit.
355 
356  TLimitDataSource* lds = new TLimitDataSource(s,b,d,se,be,l);
357  TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
358  delete lds;
359  return out;
360 }
361 
362 TConfidenceLevel *TLimit::ComputeLimit(Double_t s, Double_t b, Int_t d,
363  Int_t nmc,
364  bool stat,
365  TRandom * generator)
366 {
367  // Compute limit.
368 
369  TH1D* sh = new TH1D("__sh","__sh",1,0,2);
370  sh->Fill(1,s);
371  TH1D* bh = new TH1D("__bh","__bh",1,0,2);
372  bh->Fill(1,b);
373  TH1D* dh = new TH1D("__dh","__dh",1,0,2);
374  dh->Fill(1,d);
375  TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh);
376  TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
377  delete lds;
378  delete sh;
379  delete bh;
380  delete dh;
381  return out;
382 }
383 
384 TConfidenceLevel *TLimit::ComputeLimit(Double_t s, Double_t b, Int_t d,
385  TVectorD* se, TVectorD* be, TObjArray* l,
386  Int_t nmc,
387  bool stat,
388  TRandom * generator)
389 {
390  // Compute limit.
391 
392  TH1D* sh = new TH1D("__sh","__sh",1,0,2);
393  sh->Fill(1,s);
394  TH1D* bh = new TH1D("__bh","__bh",1,0,2);
395  bh->Fill(1,b);
396  TH1D* dh = new TH1D("__dh","__dh",1,0,2);
397  dh->Fill(1,d);
398  TLimitDataSource* lds = new TLimitDataSource(sh,bh,dh,se,be,l);
399  TConfidenceLevel* out = ComputeLimit(lds,nmc,stat,generator);
400  delete lds;
401  delete sh;
402  delete bh;
403  delete dh;
404  return out;
405 }
406 
407 Double_t TLimit::LogLikelihood(Double_t s, Double_t b, Double_t b2, Double_t d)
408 {
409  // Compute LogLikelihood (static function)
410 
411  return d*TMath::Log((s+b)/b2);
412 }