Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodDL.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Vladimir Ilievski, Saurav Shekhar
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDL *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Deep Neural Network Method *
12  * *
13  * Authors (alphabetical): *
14  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15  * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 #ifndef ROOT_TMVA_MethodDL
29 #define ROOT_TMVA_MethodDL
30 
31 //////////////////////////////////////////////////////////////////////////
32 // //
33 // MethodDL //
34 // //
35 // Method class for all Deep Learning Networks //
36 // //
37 //////////////////////////////////////////////////////////////////////////
38 
39 #include "TString.h"
40 
41 #include "TMVA/MethodBase.h"
42 #include "TMVA/Types.h"
43 
45 
46 //#ifdef R__HAS_TMVACPU
48 //#endif
49 
50 #ifdef R__HAS_TMVAGPU
52 #ifdef R__HAS_CUDNN
54 #endif
55 #endif
56 
57 #include "TMVA/DNN/Functions.h"
58 #include "TMVA/DNN/DeepNet.h"
59 
60 #include <vector>
61 
62 namespace TMVA {
63 
64 /*! All of the options that can be specified in the training string */
65 struct TTrainingSettings {
66  size_t batchSize;
67  size_t testInterval;
68  size_t convergenceSteps;
69  size_t maxEpochs;
70  DNN::ERegularization regularization;
71  DNN::EOptimizer optimizer;
72  TString optimizerName;
73  Double_t learningRate;
74  Double_t momentum;
75  Double_t weightDecay;
76  std::vector<Double_t> dropoutProbabilities;
77  bool multithreading;
78 };
79 
80 
81 class MethodDL : public MethodBase {
82 
83 private:
84  // Key-Value vector type, contining the values for the training options
85  using KeyValueVector_t = std::vector<std::map<TString, TString>>;
86 // #ifdef R__HAS_TMVAGPU
87 // #ifdef R__HAS_CUDNN
88 // using ArchitectureImpl_t = TMVA::DNN::TCudnn<Float_t>;
89 // #else
90 // using ArchitectureImpl_t = TMVA::DNN::TCuda<Float_t>;
91 // #endif
92 // #else
93 // do not use arch GPU for evaluation. It is too slow for batch size=1
94  using ArchitectureImpl_t = TMVA::DNN::TCpu<Float_t>;
95 // #else
96 // using ArchitectureImpl_t = TMVA::DNN::TReference<Float_t>;
97 // #endif
98 //#endif
99  using DeepNetImpl_t = TMVA::DNN::TDeepNet<ArchitectureImpl_t>;
100  using MatrixImpl_t = typename ArchitectureImpl_t::Matrix_t;
101  using TensorImpl_t = typename ArchitectureImpl_t::Tensor_t;
102  using ScalarImpl_t = typename ArchitectureImpl_t::Scalar_t;
103  using HostBufferImpl_t = typename ArchitectureImpl_t::HostBuffer_t;
104 
105  /*! The option handling methods */
106  void DeclareOptions();
107  void ProcessOptions();
108 
109  void Init();
110 
111  // Function to parse the layout of the input
112  void ParseInputLayout();
113  void ParseBatchLayout();
114 
115  /*! After calling the ProcesOptions(), all of the options are parsed,
116  * so using the parsed options, and given the architecture and the
117  * type of the layers, we build the Deep Network passed as
118  * a reference in the function. */
119  template <typename Architecture_t, typename Layer_t>
120  void CreateDeepNet(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
121  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets);
122 
123  template <typename Architecture_t, typename Layer_t>
124  void ParseDenseLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
125  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
126 
127  template <typename Architecture_t, typename Layer_t>
128  void ParseConvLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
129  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
130 
131  template <typename Architecture_t, typename Layer_t>
132  void ParseMaxPoolLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
133  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
134  TString delim);
135 
136  template <typename Architecture_t, typename Layer_t>
137  void ParseReshapeLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
138  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
139  TString delim);
140 
141  template <typename Architecture_t, typename Layer_t>
142  void ParseBatchNormLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
143  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString,
144  TString delim);
145 
146 
147  template <typename Architecture_t, typename Layer_t>
148  void ParseRnnLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
149  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
150 
151  template <typename Architecture_t, typename Layer_t>
152  void ParseLstmLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
153  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets, TString layerString, TString delim);
154 
155  /// train of deep neural network using the defined architecture
156  template <typename Architecture_t>
157  void TrainDeepNet();
158 
159  /// perform prediction of the deep neural network
160  /// using batches (called by GetMvaValues)
161  template <typename Architecture_t>
162  std::vector<Double_t> PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress);
163 
164  /// parce the validation string and return the number of event data used for validation
165  UInt_t GetNumValidationSamples();
166 
167  // cudnn implementation needs this format
168  /** Contains the batch size (no. of images in the batch), input depth (no. channels)
169  * and furhter input dimensios of the data (image height, width ...)*/
170  std::vector<size_t> fInputShape;
171 
172  // The size of the batch, i.e. the number of images that are contained in the batch, is either set to be the depth
173  // or the height of the batch
174  size_t fBatchDepth; ///< The depth of the batch used to train the deep net.
175  size_t fBatchHeight; ///< The height of the batch used to train the deep net.
176  size_t fBatchWidth; ///< The width of the batch used to train the deep net.
177 
178  size_t fRandomSeed; ///<The random seed used to initialize the weights and shuffling batches (default is zero)
179 
180  DNN::EInitialization fWeightInitialization; ///< The initialization method
181  DNN::EOutputFunction fOutputFunction; ///< The output function for making the predictions
182  DNN::ELossFunction fLossFunction; ///< The loss function
183 
184  TString fInputLayoutString; ///< The string defining the layout of the input
185  TString fBatchLayoutString; ///< The string defining the layout of the batch
186  TString fLayoutString; ///< The string defining the layout of the deep net
187  TString fErrorStrategy; ///< The string defining the error strategy for training
188  TString fTrainingStrategyString; ///< The string defining the training strategy
189  TString fWeightInitializationString; ///< The string defining the weight initialization method
190  TString fArchitectureString; ///< The string defining the architecure: CPU or GPU
191  TString fNumValidationString; ///< The string defining the number (or percentage) of training data used for validation
192  bool fResume;
193  bool fBuildNet; ///< Flag to control whether to build fNet, the stored network used for the evaluation
194 
195  KeyValueVector_t fSettings; ///< Map for the training strategy
196  std::vector<TTrainingSettings> fTrainingSettings; ///< The vector defining each training strategy
197 
198  TensorImpl_t fXInput; // input tensor used to evaluate fNet
199  HostBufferImpl_t fXInputBuffer; // input hist buffer corresponding to X (needed for GPU implementation)
200  std::unique_ptr<MatrixImpl_t> fYHat; // output prediction matrix of fNet
201  std::unique_ptr<DeepNetImpl_t> fNet;
202 
203 
204  ClassDef(MethodDL, 0);
205 
206 protected:
207  // provide a help message
208  void GetHelpMessage() const;
209 
210  virtual std::vector<Double_t> GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress);
211 
212 
213 public:
214  /*! Constructor */
215  MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption);
216 
217  /*! Constructor */
218  MethodDL(DataSetInfo &theData, const TString &theWeightFile);
219 
220  /*! Virtual Destructor */
221  virtual ~MethodDL();
222 
223  /*! Function for parsing the training settings, provided as a string
224  * in a key-value form. */
225  KeyValueVector_t ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim);
226 
227  /*! Check the type of analysis the deep learning network can do */
228  Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets);
229 
230  /*! Methods for training the deep learning network */
231  void Train();
232 
233  Double_t GetMvaValue(Double_t *err = 0, Double_t *errUpper = 0);
234  virtual const std::vector<Float_t>& GetRegressionValues();
235  virtual const std::vector<Float_t>& GetMulticlassValues();
236 
237  /*! Methods for writing and reading weights */
238  using MethodBase::ReadWeightsFromStream;
239  void AddWeightsXMLTo(void *parent) const;
240  void ReadWeightsFromXML(void *wghtnode);
241  void ReadWeightsFromStream(std::istream &);
242 
243  /* Create ranking */
244  const Ranking *CreateRanking();
245 
246  /* Getters */
247  size_t GetInputDepth() const { return fInputShape[1]; } //< no. of channels for an image
248  size_t GetInputHeight() const { return fInputShape[2]; }
249  size_t GetInputWidth() const { return fInputShape[3]; }
250  size_t GetInputDim() const { return fInputShape.size() - 2; }
251  std::vector<size_t> GetInputShape() const { return fInputShape; }
252 
253  size_t GetBatchSize() const { return fInputShape[0]; }
254  size_t GetBatchDepth() const { return fBatchDepth; }
255  size_t GetBatchHeight() const { return fBatchHeight; }
256  size_t GetBatchWidth() const { return fBatchWidth; }
257 
258  const DeepNetImpl_t & GetDeepNet() const { return *fNet; }
259 
260  DNN::EInitialization GetWeightInitialization() const { return fWeightInitialization; }
261  DNN::EOutputFunction GetOutputFunction() const { return fOutputFunction; }
262  DNN::ELossFunction GetLossFunction() const { return fLossFunction; }
263 
264  TString GetInputLayoutString() const { return fInputLayoutString; }
265  TString GetBatchLayoutString() const { return fBatchLayoutString; }
266  TString GetLayoutString() const { return fLayoutString; }
267  TString GetErrorStrategyString() const { return fErrorStrategy; }
268  TString GetTrainingStrategyString() const { return fTrainingStrategyString; }
269  TString GetWeightInitializationString() const { return fWeightInitializationString; }
270  TString GetArchitectureString() const { return fArchitectureString; }
271 
272  const std::vector<TTrainingSettings> &GetTrainingSettings() const { return fTrainingSettings; }
273  std::vector<TTrainingSettings> &GetTrainingSettings() { return fTrainingSettings; }
274  const KeyValueVector_t &GetKeyValueSettings() const { return fSettings; }
275  KeyValueVector_t &GetKeyValueSettings() { return fSettings; }
276 
277  /** Setters */
278  void SetInputDepth (int inputDepth) { fInputShape[1] = inputDepth; }
279  void SetInputHeight(int inputHeight) { fInputShape[2] = inputHeight; }
280  void SetInputWidth (int inputWidth) { fInputShape[3] = inputWidth; }
281  void SetInputShape (std::vector<size_t> inputShape) { fInputShape = std::move(inputShape); }
282 
283  void SetBatchSize (size_t batchSize) { fInputShape[0] = batchSize; }
284  void SetBatchDepth (size_t batchDepth) { fBatchDepth = batchDepth; }
285  void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
286  void SetBatchWidth (size_t batchWidth) { fBatchWidth = batchWidth; }
287 
288  void SetWeightInitialization(DNN::EInitialization weightInitialization)
289  {
290  fWeightInitialization = weightInitialization;
291  }
292  void SetOutputFunction (DNN::EOutputFunction outputFunction) { fOutputFunction = outputFunction; }
293  void SetErrorStrategyString (TString errorStrategy) { fErrorStrategy = errorStrategy; }
294  void SetTrainingStrategyString (TString trainingStrategyString) { fTrainingStrategyString = trainingStrategyString; }
295  void SetWeightInitializationString(TString weightInitializationString)
296  {
297  fWeightInitializationString = weightInitializationString;
298  }
299  void SetArchitectureString (TString architectureString) { fArchitectureString = architectureString; }
300  void SetLayoutString (TString layoutString) { fLayoutString = layoutString; }
301 };
302 
303 } // namespace TMVA
304 
305 #endif