Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodMLP.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 #ifndef ROOT_TMVA_MethodMLP
39 #define ROOT_TMVA_MethodMLP
40 
41 //////////////////////////////////////////////////////////////////////////
42 // //
43 // MethodMLP //
44 // //
45 // Multilayer Perceptron built off of MethodANNBase //
46 // //
47 //////////////////////////////////////////////////////////////////////////
48 
49 #include <vector>
50 #include "TString.h"
51 #include "TTree.h"
52 #include "TObjArray.h"
53 #include "TRandom3.h"
54 #include "TH1F.h"
55 #include "TMatrixDfwd.h"
56 
57 #include "TMVA/IFitterTarget.h"
58 #include "TMVA/MethodBase.h"
59 #include "TMVA/MethodANNBase.h"
60 #include "TMVA/TNeuron.h"
61 #include "TMVA/TActivation.h"
62 #include "TMVA/ConvergenceTest.h"
63 
64 #define MethodMLP_UseMinuit__
65 #undef MethodMLP_UseMinuit__
66 
67 namespace TMVA {
68 
69  class MethodMLP : public MethodANNBase, public IFitterTarget, public ConvergenceTest {
70 
71  public:
72 
73  // standard constructors
74  MethodMLP( const TString& jobName,
75  const TString& methodTitle,
76  DataSetInfo& theData,
77  const TString& theOption );
78 
79  MethodMLP( DataSetInfo& theData,
80  const TString& theWeightFile );
81 
82  virtual ~MethodMLP();
83 
84  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
85 
86  void Train();
87  // for GA
88  Double_t ComputeEstimator ( std::vector<Double_t>& parameters );
89  Double_t EstimatorFunction( std::vector<Double_t>& parameters );
90 
91  enum ETrainingMethod { kBP=0, kBFGS, kGA };
92  enum EBPTrainingMode { kSequential=0, kBatch };
93 
94  bool HasInverseHessian() { return fCalculateErrors; }
95  Double_t GetMvaValue( Double_t* err=0, Double_t* errUpper=0 );
96 
97  protected:
98 
99  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
100  void MakeClassSpecific( std::ostream&, const TString& ) const;
101 
102  // get help message text
103  void GetHelpMessage() const;
104 
105 
106  private:
107 
108  // the option handling methods
109  void DeclareOptions();
110  void ProcessOptions();
111 
112  // general helper functions
113  void Train( Int_t nEpochs );
114  void Init();
115  void InitializeLearningRates(); // although this is only needed by backprop
116 
117  // used as a measure of success in all minimization techniques
118  Double_t CalculateEstimator( Types::ETreeType treeType = Types::kTraining, Int_t iEpoch = -1 );
119 
120  // BFGS functions
121  void BFGSMinimize( Int_t nEpochs );
122  void SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &Buffer );
123  void SteepestDir( TMatrixD &Dir );
124  Bool_t GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta );
125  void SetDir( TMatrixD &Hessian, TMatrixD &Dir );
126  Double_t DerivDir( TMatrixD &Dir );
127  Bool_t LineSearch( TMatrixD &Dir, std::vector<Double_t> &Buffer, Double_t* dError=0 ); //zjh
128  void ComputeDEDw();
129  void SimulateEvent( const Event* ev );
130  void SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha );
131  Double_t GetError();
132  Double_t GetMSEErr( const Event* ev, UInt_t index = 0 ); //zjh
133  Double_t GetCEErr( const Event* ev, UInt_t index = 0 ); //zjh
134 
135  // backpropagation functions
136  void BackPropagationMinimize( Int_t nEpochs );
137  void TrainOneEpoch();
138  void Shuffle( Int_t* index, Int_t n );
139  void DecaySynapseWeights(Bool_t lateEpoch );
140  void TrainOneEvent( Int_t ievt);
141  Double_t GetDesiredOutput( const Event* ev );
142  void UpdateNetwork( Double_t desired, Double_t eventWeight=1.0 );
143  void UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight=1.0);
144  void CalculateNeuronDeltas();
145  void UpdateSynapses();
146  void AdjustSynapseWeights();
147 
148  // faster backpropagation
149  void TrainOneEventFast( Int_t ievt, Float_t*& branchVar, Int_t& type );
150 
151  // genetic algorithm functions
152  void GeneticMinimize();
153 
154 
155 #ifdef MethodMLP_UseMinuit__
156  // minuit functions -- commented out because they rely on a static pointer
157  void MinuitMinimize();
158  static MethodMLP* GetThisPtr();
159  static void IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
160  void FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t ifl );
161 #endif
162 
163  // general
164  bool fUseRegulator; // zjh
165  bool fCalculateErrors; // compute inverse hessian matrix at the end of the training
166  Double_t fPrior; // zjh
167  std::vector<Double_t> fPriorDev; // zjh
168  void GetApproxInvHessian ( TMatrixD& InvHessian, bool regulate=true ); //rank-1 approximation, neglect 2nd derivatives. //zjh
169  void UpdateRegulators(); // zjh
170  void UpdatePriors(); // zjh
171  Int_t fUpdateLimit; // zjh
172 
173  ETrainingMethod fTrainingMethod; // method of training, BP or GA
174  TString fTrainMethodS; // training method option param
175 
176  Float_t fSamplingFraction; // fraction of events which is sampled for training
177  Float_t fSamplingEpoch; // fraction of epochs where sampling is used
178  Float_t fSamplingWeight; // changing factor for event weights when sampling is turned on
179  Bool_t fSamplingTraining; // The training sample is sampled
180  Bool_t fSamplingTesting; // The testing sample is sampled
181 
182  // BFGS variables
183  Double_t fLastAlpha; // line search variable
184  Double_t fTau; // line search variable
185  Int_t fResetStep; // reset time (how often we clear hessian matrix)
186 
187  // back propagation variable
188  Double_t fLearnRate; // learning rate for synapse weight adjustments
189  Double_t fDecayRate; // decay rate for above learning rate
190  EBPTrainingMode fBPMode; // backprop learning mode (sequential or batch)
191  TString fBpModeS; // backprop learning mode option string (sequential or batch)
192  Int_t fBatchSize; // batch size, only matters if in batch learning mode
193  Int_t fTestRate; // test for overtraining performed at each #th epochs
194  Bool_t fEpochMon; // create and fill epoch-wise monitoring histograms (makes outputfile big!)
195 
196  // genetic algorithm variables
197  Int_t fGA_nsteps; // GA settings: number of steps
198  Int_t fGA_preCalc; // GA settings: number of pre-calc steps
199  Int_t fGA_SC_steps; // GA settings: SC_steps
200  Int_t fGA_SC_rate; // GA settings: SC_rate
201  Double_t fGA_SC_factor; // GA settings: SC_factor
202 
203  // regression, storage of deviations
204  std::vector<std::pair<Float_t,Float_t> >* fDeviationsFromTargets; // deviation from the targets, event weight
205 
206  Float_t fWeightRange; // suppress outliers for the estimator calculation
207 
208 #ifdef MethodMLP_UseMinuit__
209  // minuit variables -- commented out because they rely on a static pointer
210  Int_t fNumberOfWeights; // Minuit: number of weights
211  static MethodMLP* fgThis; // Minuit: this pointer
212 #endif
213 
214  // debugging flags
215  static const Int_t fgPRINT_ESTIMATOR_INC = 10; // debug flags
216  static const Bool_t fgPRINT_SEQ = kFALSE; // debug flags
217  static const Bool_t fgPRINT_BATCH = kFALSE; // debug flags
218 
219  ClassDef(MethodMLP,0); // Multi-layer perceptron implemented specifically for TMVA
220  };
221 
222 } // namespace TMVA
223 
224 #endif