Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
PDEFoamDecisionTree.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Classes: PDEFoamDecisionTree *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation of decision tree like PDEFoam *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
17  * Alexander Voigt - TU Dresden, Germany *
18  * Peter Speckmayer - CERN, Switzerland *
19  * *
20  * Copyright (c) 2010: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 /*! \class TMVA::PDEFoamDecisionTree
30 \ingroup TMVA
31 
32 This PDEFoam variant acts like a decision tree and stores in every
33 cell the discriminant
34 
35  D = #events with given class / total number of events
36 
37 as well as the statistical error on the discriminant. It therefore
38 acts as a discriminant estimator. The decision tree-like behaviour
39 is achieved by overriding PDEFoamDiscriminant::Explore() to use a
40 decision tree-like cell splitting algorithm (given a separation
41 type).
42 
43 This PDEFoam variant should be booked together with the
44 PDEFoamDecisionTreeDensity density estimator, which returns the
45 events in a cell without sampling.
46 */
47 
50 
51 #include "TMVA/MsgLogger.h"
52 #include "TMVA/PDEFoamCell.h"
54 #include "TMVA/PDEFoamVect.h"
55 #include "TMVA/SeparationBase.h"
56 #include "TMVA/Types.h"
57 #include "TMVA/Volume.h"
58 
59 #include "Rtypes.h"
60 #include "TH1D.h"
61 
62 class TString;
63 
64 ClassImp(TMVA::PDEFoamDecisionTree);
65 
66 ////////////////////////////////////////////////////////////////////////////////
67 /// Default constructor for streamer, user should not use it.
68 
69 TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree()
70 : PDEFoamDiscriminant()
71  , fSepType(NULL)
72 {
73 }
74 
75 ////////////////////////////////////////////////////////////////////////////////
76 /// Parameters:
77 ///
78 /// - name - name of the foam
79 ///
80 /// - sepType - separation type used for the cell splitting (will be
81 /// deleted in the destructor)
82 ///
83 /// - cls - class to consider as signal when calculating the purity
84 
85 TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree(const TString& name, SeparationBase *sepType, UInt_t cls)
86  : PDEFoamDiscriminant(name, cls)
87  , fSepType(sepType)
88 {
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// Copy Constructor NOT IMPLEMENTED (NEVER USED)
93 
94 TMVA::PDEFoamDecisionTree::PDEFoamDecisionTree(const PDEFoamDecisionTree &from)
95  : PDEFoamDiscriminant(from)
96  , fSepType(from.fSepType)
97 {
98  Log() << kFATAL << "COPY CONSTRUCTOR NOT IMPLEMENTED" << Endl;
99 }
100 
101 ////////////////////////////////////////////////////////////////////////////////
102 /// Destructor
103 /// deletes fSepType
104 
105 TMVA::PDEFoamDecisionTree::~PDEFoamDecisionTree()
106 {
107  if (fSepType)
108  delete fSepType;
109 }
110 
111 ////////////////////////////////////////////////////////////////////////////////
112 /// Internal subprogram used by Create. It explores newly defined
113 /// cell with according to the decision tree logic. The separation
114 /// set via the 'sepType' option in the constructor.
115 ///
116 /// The optimal division point for eventual future cell division is
117 /// determined/recorded. Note that links to parents and initial
118 /// volume = 1/2 parent has to be already defined prior to calling
119 /// this routine.
120 ///
121 /// Note, that according to the decision tree logic, a cell is only
122 /// split, if the number of (unweighted) events in each daughter
123 /// cell is greater than fNmin.
124 
125 void TMVA::PDEFoamDecisionTree::Explore(PDEFoamCell *cell)
126 {
127  if (!cell)
128  Log() << kFATAL << "<DTExplore> Null pointer given!" << Endl;
129 
130  // create edge histograms
131  std::vector<TH1D*> hsig, hbkg, hsig_unw, hbkg_unw;
132  hsig.reserve(fDim);
133  hbkg.reserve(fDim);
134  hsig_unw.reserve(fDim);
135  hbkg_unw.reserve(fDim);
136  for (Int_t idim = 0; idim < fDim; idim++) {
137  hsig.push_back(new TH1D(Form("hsig_%i", idim),
138  Form("signal[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
139  hbkg.push_back(new TH1D(Form("hbkg_%i", idim),
140  Form("background[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
141  hsig_unw.push_back(new TH1D(Form("hsig_unw_%i", idim),
142  Form("signal_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
143  hbkg_unw.push_back(new TH1D(Form("hbkg_unw_%i", idim),
144  Form("background_unw[%i]", idim), fNBin, fXmin[idim], fXmax[idim]));
145  }
146 
147  // get cell position and size
148  PDEFoamVect cellSize(GetTotDim()), cellPosi(GetTotDim());
149  cell->GetHcub(cellPosi, cellSize);
150 
151  // determine lower and upper cell bound
152  std::vector<Double_t> lb(GetTotDim()); // lower bound
153  std::vector<Double_t> ub(GetTotDim()); // upper bound
154  for (Int_t idim = 0; idim < GetTotDim(); idim++) {
155  lb[idim] = VarTransformInvers(idim, cellPosi[idim] - std::numeric_limits<float>::epsilon());
156  ub[idim] = VarTransformInvers(idim, cellPosi[idim] + cellSize[idim] + std::numeric_limits<float>::epsilon());
157  }
158 
159  // fDistr must be of type PDEFoamDecisionTreeDensity*
160  PDEFoamDecisionTreeDensity *distr = dynamic_cast<PDEFoamDecisionTreeDensity*>(fDistr);
161  if (distr == NULL)
162  Log() << kFATAL << "<PDEFoamDecisionTree::Explore>: cast failed: "
163  << "PDEFoamDensityBase* --> PDEFoamDecisionTreeDensity*" << Endl;
164 
165  // create TMVA::Volume object needed for searching within the BST
166  TMVA::Volume volume(&lb, &ub);
167 
168  // fill the signal and background histograms for the given volume
169  distr->FillHistograms(volume, hsig, hbkg, hsig_unw, hbkg_unw);
170 
171  // ------ determine the best division edge
172  Double_t xBest = 0.5; // best division point
173  Int_t kBest = -1; // best split dimension
174  Double_t maxGain = -1.0; // maximum gain
175  Double_t nTotS = hsig.at(0)->Integral(0, hsig.at(0)->GetNbinsX() + 1);
176  Double_t nTotB = hbkg.at(0)->Integral(0, hbkg.at(0)->GetNbinsX() + 1);
177  Double_t nTotS_unw = hsig_unw.at(0)->Integral(0, hsig_unw.at(0)->GetNbinsX() + 1);
178  Double_t nTotB_unw = hbkg_unw.at(0)->Integral(0, hbkg_unw.at(0)->GetNbinsX() + 1);
179 
180  for (Int_t idim = 0; idim < fDim; ++idim) {
181  Double_t nSelS = hsig.at(idim)->GetBinContent(0);
182  Double_t nSelB = hbkg.at(idim)->GetBinContent(0);
183  Double_t nSelS_unw = hsig_unw.at(idim)->GetBinContent(0);
184  Double_t nSelB_unw = hbkg_unw.at(idim)->GetBinContent(0);
185  for (Int_t jLo = 1; jLo < fNBin; jLo++) {
186  nSelS += hsig.at(idim)->GetBinContent(jLo);
187  nSelB += hbkg.at(idim)->GetBinContent(jLo);
188  nSelS_unw += hsig_unw.at(idim)->GetBinContent(jLo);
189  nSelB_unw += hbkg_unw.at(idim)->GetBinContent(jLo);
190 
191  // proceed if total number of events in left and right cell
192  // is greater than fNmin
193  if (!((nSelS_unw + nSelB_unw) >= GetNmin() &&
194  (nTotS_unw - nSelS_unw + nTotB_unw - nSelB_unw) >= GetNmin()))
195  continue;
196 
197  Double_t xLo = 1.0 * jLo / fNBin;
198 
199  // calculate separation gain
200  Double_t gain = fSepType->GetSeparationGain(nSelS, nSelB, nTotS, nTotB);
201 
202  if (gain >= maxGain) {
203  maxGain = gain;
204  xBest = xLo;
205  kBest = idim;
206  }
207  } // jLo
208  } // idim
209 
210  if (kBest >= fDim || kBest < 0) {
211  // No best division edge found! One must ensure, that this cell
212  // is not chosen for splitting in PeekMax(). But since in
213  // PeekMax() it is ensured that cell->GetDriv() > epsilon, one
214  // should set maxGain to -1.0 (or even 0.0?) here.
215  maxGain = -1.0;
216  }
217 
218  // set cell properties
219  cell->SetBest(kBest);
220  cell->SetXdiv(xBest);
221  if (nTotB + nTotS > 0)
222  cell->SetIntg(nTotS / (nTotB + nTotS));
223  else
224  cell->SetIntg(0.0);
225  cell->SetDriv(maxGain);
226  cell->CalcVolume();
227 
228  // set cell element 0 (total number of events in cell) during
229  // build-up
230  if (GetNmin() > 0)
231  SetCellElement(cell, 0, nTotS + nTotB);
232 
233  // clean up
234  for (UInt_t ih = 0; ih < hsig.size(); ih++) delete hsig.at(ih);
235  for (UInt_t ih = 0; ih < hbkg.size(); ih++) delete hbkg.at(ih);
236  for (UInt_t ih = 0; ih < hsig_unw.size(); ih++) delete hsig_unw.at(ih);
237  for (UInt_t ih = 0; ih < hbkg_unw.size(); ih++) delete hbkg_unw.at(ih);
238 }