Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
DataSetFactory.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSetFactory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Contains all the data information *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2006: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_DataSetFactory
30 #define ROOT_TMVA_DataSetFactory
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // DataSetFactory //
35 // //
36 // Class that contains all the data information //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 
42 #include "TString.h"
43 #include "TTree.h"
44 #include "TCut.h"
45 #include "TTreeFormula.h"
46 #include "TMatrixDfwd.h"
47 #include "TPrincipal.h"
48 #include "TRandom3.h"
49 
50 #include "TMVA/Types.h"
51 #include "TMVA/VariableInfo.h"
52 #include "TMVA/Event.h"
53 
54 namespace TMVA {
55 
56  class DataSet;
57  class DataSetInfo;
58  class DataInputHandler;
59  class TreeInfo;
60  class MsgLogger;
61 
62  // =============== maybe move these elswhere (e.g. into the tools )
63 
64  // =============== functors =======================
65 
66  // delete-functor (to be used in e.g. for_each algorithm)
67  template<class T>
68  struct DeleteFunctor_t
69  {
70  DeleteFunctor_t& operator()(const T* p) {
71  delete p;
72  return *this;
73  }
74  };
75 
76  template<class T>
77  DeleteFunctor_t<const T> DeleteFunctor()
78  {
79  return DeleteFunctor_t<const T>();
80  }
81 
82 
83  template< typename T >
84  class Increment {
85  T value;
86  public:
87  Increment( T start ) : value( start ){ }
88  T operator()() {
89  return value++;
90  }
91  };
92 
93 
94 
95  template <typename F>
96  class null_t
97  {
98  private:
99  // returns argF
100  public:
101  typedef F argument_type;
102  F operator()(const F& argF) const
103  {
104  return argF;
105  }
106  };
107 
108  template <typename F>
109  inline null_t<F> null() {
110  return null_t<F>();
111  }
112 
113 
114 
115  template <typename F, typename G, typename H>
116  class compose_binary_t : public std::binary_function<typename G::argument_type,
117  typename H::argument_type,
118  typename F::result_type>
119  {
120  private:
121  const F& f; // f(g(argG),h(argH))
122  const G& g;
123  const H& h;
124  public:
125  compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h)
126  {
127  }
128 
129  typename F::result_type operator()(const typename G::argument_type& argG,
130  const typename H::argument_type& argH) const
131  {
132  return f(g(argG),h(argH));
133  }
134  };
135 
136  template <typename F, typename G, typename H>
137  inline compose_binary_t<F,G,H> compose_binary(const F& _f, const G& _g, const H& _h) {
138  return compose_binary_t<F,G,H>(_f,_g,_h);
139  }
140 
141 
142 
143 
144  template <typename F, typename G>
145  class compose_unary_t : public std::unary_function<typename G::argument_type,
146  typename F::result_type>
147  {
148  private:
149  const F& f; // f(g(argG))
150  const G& g;
151  public:
152  compose_unary_t(const F& _f, const G& _g) : f(_f), g(_g)
153  {
154  }
155 
156  typename F::result_type operator()(const typename G::argument_type& argG) const
157  {
158  return f(g(argG));
159  }
160  };
161 
162  template <typename F, typename G>
163  inline compose_unary_t<F,G> compose_unary(const F& _f, const G& _g) {
164  return compose_unary_t<F,G>(_f,_g);
165  }
166 
167  // =============== functors =======================
168 
169 
170  // =========================================================
171 
172 
173  class DataSetFactory:public TObject {
174 
175  typedef std::vector<Event* > EventVector;
176  typedef std::vector< EventVector > EventVectorOfClasses;
177  typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
178  typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType;
179 
180  typedef std::vector< Double_t > ValuePerClass;
181  typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;
182 
183  class EventStats {
184  public:
185  Int_t nTrainingEventsRequested;
186  Int_t nTestingEventsRequested;
187  Float_t TrainTestSplitRequested;
188  Int_t nInitialEvents;
189  Int_t nEvBeforeCut;
190  Int_t nEvAfterCut;
191  Float_t nWeEvBeforeCut;
192  Float_t nWeEvAfterCut;
193  Double_t nNegWeights;
194  Float_t* varAvLength;//->
195  EventStats():
196  nTrainingEventsRequested(0),
197  nTestingEventsRequested(0),
198  TrainTestSplitRequested(0),
199  nInitialEvents(0),
200  nEvBeforeCut(0),
201  nEvAfterCut(0),
202  nWeEvBeforeCut(0),
203  nWeEvAfterCut(0),
204  nNegWeights(0),
205  varAvLength(0)
206  {}
207  ~EventStats() { delete[] varAvLength; }
208  Float_t cutScaling() const { return Float_t(nEvAfterCut)/nEvBeforeCut; }
209  };
210 
211  typedef std::vector< int > NumberPerClass;
212  typedef std::vector< EventStats > EvtStatsPerClass;
213 
214  public:
215 
216  ~DataSetFactory();
217 
218  DataSetFactory();
219 
220  DataSet* CreateDataSet( DataSetInfo &, DataInputHandler& );
221  protected:
222 
223 
224  DataSet* BuildInitialDataSet( DataSetInfo&, TMVA::DataInputHandler& );
225  DataSet* BuildDynamicDataSet( DataSetInfo& );
226 
227  // ---------- new versions
228  void BuildEventVector ( DataSetInfo& dsi,
229  DataInputHandler& dataInput,
230  EventVectorOfClassesOfTreeType& eventsmap,
231  EvtStatsPerClass& eventCounts);
232 
233  DataSet* MixEvents ( DataSetInfo& dsi,
234  EventVectorOfClassesOfTreeType& eventsmap,
235  EvtStatsPerClass& eventCounts,
236  const TString& splitMode,
237  const TString& mixMode,
238  const TString& normMode,
239  UInt_t splitSeed);
240 
241  void RenormEvents ( DataSetInfo& dsi,
242  EventVectorOfClassesOfTreeType& eventsmap,
243  const EvtStatsPerClass& eventCounts,
244  const TString& normMode );
245 
246  void InitOptions ( DataSetInfo& dsi,
247  EvtStatsPerClass& eventsmap,
248  TString& normMode, UInt_t& splitSeed,
249  TString& splitMode, TString& mixMode);
250 
251 
252  // ------------------------
253 
254  // auxiliary functions to compute correlations
255  TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber );
256  TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber );
257  void CalcMinMax ( DataSet*, DataSetInfo& dsi );
258 
259  // resets branch addresses to current event
260  void ResetBranchAndEventAddresses( TTree* );
261  void ResetCurrentTree() { fCurrentTree = 0; }
262  void ChangeToNewTree( TreeInfo&, const DataSetInfo & );
263  Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar );
264 
265  // verbosity
266  Bool_t Verbose() { return fVerbose; }
267 
268  // data members
269 
270  // verbosity
271  Bool_t fVerbose; // Verbosity
272  TString fVerboseLevel; // VerboseLevel
273 
274  // Printing
275  Bool_t fCorrelations = kFALSE; // Whether to print correlations or not
276  Bool_t fComputeCorrelations = kFALSE; // Whether to force computation of correlations or not
277 
278  Bool_t fScaleWithPreselEff; // how to deal with requested #events in connection with preselection cuts
279 
280  // the event
281  TTree* fCurrentTree; // the tree, events are currently read from
282  UInt_t fCurrentEvtIdx; // the current event (to avoid reading of the same event)
283 
284  // the formulas for reading the original tree
285  std::vector<TTreeFormula*> fInputFormulas; // input variables
286  std::vector<std::pair<TTreeFormula*, Int_t>> fInputTableFormulas; //! input variables expression for arrays
287  std::vector<TTreeFormula *> fTargetFormulas; // targets
288  std::vector<TTreeFormula*> fCutFormulas; // cuts
289  std::vector<TTreeFormula*> fWeightFormula; // weights
290  std::vector<TTreeFormula*> fSpectatorFormulas; // spectators
291 
292  MsgLogger* fLogger; //! message logger
293  MsgLogger& Log() const { return *fLogger; }
294  public:
295  ClassDef(DataSetFactory, 2);
296  };
297 }
298 
299 #endif