Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
KDEKernel.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Asen Christov
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6  * Package: TMVA *
7  * Class : TMVA::KDEKernel *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Asen Christov <christov@physik.uni-freiburg.de> - Freiburg U., Germany *
15  * *
16  * Copyright (c) 2005: *
17  * CERN, Switzerland *
18  * MPI-K Heidelberg, Germany *
19  * Freiburg U., Germany *
20  * *
21  * Redistribution and use in source and binary forms, with or without *
22  * modification, are permitted according to the terms listed in LICENSE *
23  * (http://tmva.sourceforge.net/LICENSE) *
24  **********************************************************************************/
25 
26 /*! \class TMVA::KDEKernel
27 \ingroup TMVA
28 
29 KDE Kernel for "smoothing" the PDFs.
30 
31 */
32 
33 #include "TH1.h"
34 #include "TH1F.h"
35 #include "TF1.h"
36 
37 #include "TMath.h"
38 
39 #include "TMVA/KDEKernel.h"
40 #include "TMVA/MsgLogger.h"
41 #include "TMVA/Types.h"
42 
43 ClassImp(TMVA::KDEKernel);
44 
45 ////////////////////////////////////////////////////////////////////////////////
46 /// constructor
47 /// sanity check
48 
49 TMVA::KDEKernel::KDEKernel( EKernelIter kiter, const TH1 *hist, Float_t lower_edge, Float_t upper_edge,
50  EKernelBorder kborder, Float_t FineFactor )
51 : fSigma( 1. ),
52  fIter ( kiter ),
53  fLowerEdge (lower_edge ),
54  fUpperEdge (upper_edge),
55  fFineFactor ( FineFactor ),
56  fKernel_integ ( 0 ),
57  fKDEborder ( kborder ),
58  fLogger( new MsgLogger("KDEKernel") )
59 {
60  if (hist == NULL) {
61  Log() << kFATAL << "Called without valid histogram pointer (hist)!" << Endl;
62  }
63 
64  fHist = (TH1F*)hist->Clone();
65  fFirstIterHist = (TH1F*)hist->Clone();
66  fFirstIterHist->Reset(); // now it is empty but with the proper binning
67  fSigmaHist = (TH1F*)hist->Clone();
68  fSigmaHist->Reset(); // now fSigmaHist is empty but with the proper binning
69 
70  fHiddenIteration=false;
71 }
72 
73 ////////////////////////////////////////////////////////////////////////////////
74 /// destructor
75 
76 TMVA::KDEKernel::~KDEKernel()
77 {
78  if (fHist != NULL) delete fHist;
79  if (fFirstIterHist != NULL) delete fFirstIterHist;
80  if (fSigmaHist != NULL) delete fSigmaHist;
81  if (fKernel_integ != NULL) delete fKernel_integ;
82  delete fLogger;
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 /// when using Gaussian as Kernel function this is faster way to calculate the integrals
87 
88 Double_t GaussIntegral(Double_t *x, Double_t *par)
89 {
90  if ( (par[1]<=0) || (x[0]>x[1])) return -1.;
91 
92  Float_t xs1=(x[0]-par[0])/par[1];
93  Float_t xs2=(x[1]-par[0])/par[1];
94 
95  if (xs1==0) {
96  if (xs2==0) return 0.;
97  if (xs2>0 ) return 0.5*TMath::Erf(xs2);
98  }
99  if (xs2==0) return 0.5*TMath::Erf(TMath::Abs(xs1));
100  if (xs1>0) return 0.5*(TMath::Erf(xs2)-TMath::Erf(xs1));
101  if (xs1<0) {
102  if (xs2>0 ) return 0.5*(TMath::Erf(xs2)+TMath::Erf(TMath::Abs(xs1)));
103  else return 0.5*(TMath::Erf(TMath::Abs(xs1))-TMath::Erf(TMath::Abs(xs2)));
104  }
105  return -1.;
106 }
107 
108 ////////////////////////////////////////////////////////////////////////////////
109 /// fIter == 1 ---> nonadaptive KDE
110 /// fIter == 2 ---> adaptive KDE
111 
112 void TMVA::KDEKernel::SetKernelType( EKernelType ktype )
113 {
114  if (ktype == kGauss) {
115 
116  // i.e. gauss kernel
117  //
118  // this is going to be done for both (nonadaptive KDE and adaptive KDE)
119  // for nonadaptive KDE this is the only = final thing to do
120  // for adaptive KDE this is going to be used in the first (hidden) iteration
121  fKernel_integ = new TF1("GaussIntegral",GaussIntegral,fLowerEdge,fUpperEdge,4);
122  fSigma = ( TMath::Sqrt(2.0)
123  *TMath::Power(4./3., 0.2)
124  *fHist->GetRMS()
125  *TMath::Power(fHist->Integral(), -0.2) );
126  // this formula for sigma is valid for Gaussian Kernel function (nonadaptive KDE).
127  // formula found in:
128  // Multivariate Density Estimation, Theory, Practice and Visualization D. W. SCOTT, 1992 New York, Wiley
129  if (fSigma <= 0 ) {
130  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
131  }
132  }
133 
134  if (fIter == kAdaptiveKDE) {
135 
136  // this is done only for adaptive KDE
137 
138  // fill a temporary histo using nonadaptive KDE
139  // this histo is identical with the final output when using only nonadaptive KDE
140  fHiddenIteration=true;
141 
142  Float_t histoLowEdge=fHist->GetBinLowEdge(1);
143  Float_t histoUpperEdge=fHist->GetBinLowEdge(fHist->GetNbinsX()+1);
144 
145  for (Int_t i=1;i<fHist->GetNbinsX();i++) {
146  // loop over the bins of the original histo
147  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
148  // loop over the bins of the PDF histo and fill it
149  fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
150  this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
151  fFirstIterHist->GetBinLowEdge(j+1),
152  fHist->GetBinCenter(i),
153  i)
154  );
155  }
156  if (fKDEborder == 3) { // mirror the samples and fill them again
157  // in order to save time do the mirroring only for the first (the lower) 1/5 of the histo to the left;
158  // and the last (the higher) 1/5 of the histo to the right.
159  // the middle part of the histo, which is not mirrored, has no influence on the border effects anyway ...
160  if (i < fHist->GetNbinsX()/5 ) { // the first (the lower) 1/5 of the histo
161  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
162  // loop over the bins of the PDF histo and fill it
163  fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
164  this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
165  fFirstIterHist->GetBinLowEdge(j+1),
166  2*histoLowEdge-fHist->GetBinCenter(i), // mirroring to the left
167  i)
168  );
169  }
170  }
171  if (i > 4*fHist->GetNbinsX()/5) { // the last (the higher) 1/5 of the histo
172  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
173  // loop over the bins of the PDF histo and fill it
174  fFirstIterHist->AddBinContent(j,fHist->GetBinContent(i)*
175  this->GetBinKernelIntegral(fFirstIterHist->GetBinLowEdge(j),
176  fFirstIterHist->GetBinLowEdge(j+1),
177  2*histoUpperEdge-fHist->GetBinCenter(i), // mirroring to the right
178  i)
179  );
180  }
181  }
182  }
183  }
184 
185  fFirstIterHist->SetEntries(fHist->GetEntries()); //set the number of entries to be the same as the original histo
186 
187  // do "function like" integration = sum of (bin_width*bin_content):
188  Float_t integ=0;
189  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++)
190  integ+=fFirstIterHist->GetBinContent(j)*fFirstIterHist->GetBinWidth(j);
191  fFirstIterHist->Scale(1./integ);
192 
193  fHiddenIteration=false;
194 
195  // OK, now we have the first iteration,
196  // next: calculate the Sigmas (Widths) for the second (adaptive) iteration
197  // based on the output of the first iteration
198  // these Sigmas will be stored in histo called fSigmaHist
199  for (Int_t j=1;j<fFirstIterHist->GetNbinsX();j++) {
200  // loop over the bins of the PDF histo and fill fSigmaHist
201  if (fSigma*TMath::Sqrt(1.0/fFirstIterHist->GetBinContent(j)) <= 0 ) {
202  Log() << kFATAL << "<SetKernelType> KDE sigma has invalid value ( <=0 ) !" << Endl;
203  }
204 
205  fSigmaHist->SetBinContent(j,fFineFactor*fSigma/TMath::Sqrt(fFirstIterHist->GetBinContent(j)));
206  }
207  }
208 
209  if (fKernel_integ ==0 ) {
210  Log() << kFATAL << "KDE kernel not correctly initialized!" << Endl;
211  }
212 }
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 /// calculates the integral of the Kernel
216 
217 Float_t TMVA::KDEKernel::GetBinKernelIntegral( Float_t lowr, Float_t highr, Float_t mean, Int_t binnum )
218 {
219  if ((fIter == kNonadaptiveKDE) || fHiddenIteration )
220  fKernel_integ->SetParameters(mean,fSigma); // non adaptive KDE
221  else if ((fIter == kAdaptiveKDE) && !fHiddenIteration )
222  fKernel_integ->SetParameters(mean,fSigmaHist->GetBinContent(binnum)); // adaptive KDE
223 
224  if ( fKDEborder == 2 ) { // renormalization of the kernel function
225  Float_t renormFactor=1.0/fKernel_integ->Eval(fLowerEdge,fUpperEdge);
226  return (renormFactor*fKernel_integ->Eval(lowr,highr));
227  }
228 
229  // the RenormFactor takes care about the "border" effects, i.e. sets the
230  // integral to one inside the histogram borders
231  return (fKernel_integ->Eval(lowr,highr));
232 }
233