Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
DataLoader.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag, Omar Zapata, Lorenzo Moneta, Sergei Gleyzer
3 //NOTE: Based on TMVA::Factory
4 
5 /**********************************************************************************
6  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
7  * Package: TMVA *
8  * Class : DataLoader *
9  * Web : http://tmva.sourceforge.net *
10  * *
11  * Description: *
12  * This is a class to load datasets into every booked method *
13  * *
14  * Authors (alphabetical): *
15  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
16  * Omar Zapata <andresete.chaos@gmail.com> - ITM/UdeA, Colombia *
17  * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
18  * *
19  * Copyright (c) 2005-2011: *
20  * CERN, Switzerland *
21  * ITM/UdeA, Colombia *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 #ifndef ROOT_TMVA_DataLoader
29 #define ROOT_TMVA_DataLoader
30 
31 #include <string>
32 #include <vector>
33 #include <map>
34 #include "TCut.h"
35 
36 #include "TMVA/Configurable.h"
37 #include "TMVA/Types.h"
38 #include "TMVA/DataSet.h"
39 
40 class TFile;
41 class TTree;
42 class TH2;
43 
44 namespace TMVA {
45 
46  class CvSplit;
47  class DataInputHandler;
48  class DataSetInfo;
49  class DataSetManager;
50  class VariableTransformBase;
51 
52  class DataLoader : public Configurable {
53  public:
54 
55  DataLoader(TString thedlName="default");
56 
57  // default destructor
58  virtual ~DataLoader();
59 
60 
61  // add events to training and testing trees
62  void AddSignalTrainingEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
63  void AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight = 1.0 );
64  void AddSignalTestEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
65  void AddBackgroundTestEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
66  void AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight );
67  void AddTestEvent ( const TString& className, const std::vector<Double_t>& event, Double_t weight );
68  void AddEvent ( const TString& className, Types::ETreeType tt, const std::vector<Double_t>& event, Double_t weight );
69  Bool_t UserAssignEvents(UInt_t clIndex);
70  TTree* CreateEventAssignTrees( const TString& name );
71 
72  DataSetInfo& AddDataSet( DataSetInfo& );
73  DataSetInfo& AddDataSet( const TString& );
74  DataSetInfo& GetDataSetInfo();
75  DataLoader* VarTransform(TString trafoDefinition);
76 
77  // special case: signal/background
78 
79  // Data input related
80  void SetInputTrees( const TString& signalFileName, const TString& backgroundFileName,
81  Double_t signalWeight=1.0, Double_t backgroundWeight=1.0 );
82  void SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut );
83  // Set input trees at once
84  void SetInputTrees( TTree* signal, TTree* background,
85  Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ;
86 
87  void AddSignalTree( TTree* signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
88  void AddSignalTree( TString datFileS, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
89  void AddSignalTree( TTree* signal, Double_t weight, const TString& treetype );
90 
91  // ... depreciated, kept for backwards compatibility
92  void SetSignalTree( TTree* signal, Double_t weight=1.0);
93 
94  void AddBackgroundTree( TTree* background, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
95  void AddBackgroundTree( TString datFileB, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
96  void AddBackgroundTree( TTree* background, Double_t weight, const TString & treetype );
97 
98  // ... depreciated, kept for backwards compatibility
99  void SetBackgroundTree( TTree* background, Double_t weight=1.0 );
100 
101  void SetSignalWeightExpression( const TString& variable );
102  void SetBackgroundWeightExpression( const TString& variable );
103 
104  // special case: regression
105  void AddRegressionTree( TTree* tree, Double_t weight = 1.0,
106  Types::ETreeType treetype = Types::kMaxTreeType ) {
107  AddTree( tree, "Regression", weight, "", treetype );
108  }
109 
110  // general
111 
112  // Data input related
113  void SetTree( TTree* tree, const TString& className, Double_t weight ); // depreciated
114  void AddTree( TTree* tree, const TString& className, Double_t weight=1.0,
115  const TCut& cut = "",
116  Types::ETreeType tt = Types::kMaxTreeType );
117  void AddTree( TTree* tree, const TString& className, Double_t weight, const TCut& cut, const TString& treeType );
118 
119  // set input variable
120  void SetInputVariables ( std::vector<TString>* theVariables ); // deprecated
121 
122  void AddVariable ( const TString& expression, const TString& title, const TString& unit,
123  char type='F', Double_t min = 0, Double_t max = 0 );
124  void AddVariable ( const TString& expression, char type='F',
125  Double_t min = 0, Double_t max = 0 );
126 
127  // NEW: add an array of variables (e.g. for image data) with the provided size
128  void AddVariablesArray(const TString &expression, int size, char type = 'F',
129  Double_t min = 0, Double_t max = 0);
130 
131 
132  void AddTarget ( const TString& expression, const TString& title = "", const TString& unit = "",
133  Double_t min = 0, Double_t max = 0 );
134  void AddRegressionTarget( const TString& expression, const TString& title = "", const TString& unit = "",
135  Double_t min = 0, Double_t max = 0 )
136  {
137  AddTarget( expression, title, unit, min, max );
138  }
139  void AddSpectator ( const TString& expression, const TString& title = "", const TString& unit = "",
140  Double_t min = 0, Double_t max = 0 );
141 
142  // set weight for class
143  void SetWeightExpression( const TString& variable, const TString& className = "" );
144 
145  // set cut for class
146  void SetCut( const TString& cut, const TString& className = "" );
147  void SetCut( const TCut& cut, const TString& className = "" );
148  void AddCut( const TString& cut, const TString& className = "" );
149  void AddCut( const TCut& cut, const TString& className = "" );
150 
151 
152  // prepare input tree for training
153  void PrepareTrainingAndTestTree( const TCut& cut, const TString& splitOpt );
154  void PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt );
155 
156  // ... deprecated, kept for backwards compatibility
157  void PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, Int_t Ntest = -1 );
158 
159  void PrepareTrainingAndTestTree( const TCut& cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
160  const TString& otherOpt="SplitMode=Random:!V" );
161 
162  // Cross validation
163  void MakeKFoldDataSet(CvSplit & s);
164  void PrepareFoldDataSet(CvSplit & s, UInt_t foldNumber, Types::ETreeType tt = Types::kTraining);
165  void RecombineKFoldDataSet(CvSplit & s, Types::ETreeType tt = Types::kTraining);
166 
167  const DataSetInfo& GetDefaultDataSetInfo(){ return DefaultDataSetInfo(); }
168 
169  TH2* GetCorrelationMatrix(const TString& className);
170 
171  //Copy method use in VI and CV DEPRECATED: you can just call Clone DataLoader *dl2=(DataLoader *)dl1->Clone("dl2")
172  DataLoader* MakeCopy(TString name);
173  friend void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src);
174  DataInputHandler& DataInput() { return *fDataInputHandler; }
175 
176  private:
177 
178 
179  DataSetInfo& DefaultDataSetInfo();
180  void SetInputTreesFromEventAssignTrees();
181 
182 
183  private:
184 
185  // data members
186 
187 
188  DataSetManager* fDataSetManager; // DSMTEST
189 
190 
191  DataInputHandler* fDataInputHandler;//->
192 
193  std::vector<TMVA::VariableTransformBase*> fDefaultTrfs; // list of transformations on default DataSet
194 
195  // cd to local directory
196  TString fOptions; // option string given by construction (presently only "V")
197  TString fTransformations; // List of transformations to test
198  Bool_t fVerbose; // verbose mode
199 
200  // flag determining the way training and test data are assigned to DataLoader
201  enum DataAssignType { kUndefined = 0,
202  kAssignTrees,
203  kAssignEvents };
204  DataAssignType fDataAssignType; // flags for data assigning
205  std::vector<TTree*> fTrainAssignTree; // for each class: tmp tree if user wants to assign the events directly
206  std::vector<TTree*> fTestAssignTree; // for each class: tmp tree if user wants to assign the events directly
207 
208  Int_t fATreeType = 0; // type of event (=classIndex)
209  Float_t fATreeWeight = 0.0; // weight of the event
210  std::vector<Float_t> fATreeEvent; // event variables
211 
212  Types::EAnalysisType fAnalysisType; // the training type
213 
214  protected:
215 
216  ClassDef(DataLoader,4);
217  };
218  void DataLoaderCopy(TMVA::DataLoader* des, TMVA::DataLoader* src);
219 } // namespace TMVA
220 
221 #endif
222