Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
DeepNet.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Vladimir Ilievski
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TDeepNet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Deep Neural Network *
12  * *
13  * Authors (alphabetical): *
14  * Akshay Vashistha <akshayvashistha1995@gmail.com> - CERN, Switzerland *
15  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
16  * Saurav Shekhar <sauravshekhar01@gmail.com> - CERN, Switzerland *
17  * *
18  * Copyright (c) 2005-2015: *
19  * CERN, Switzerland *
20  * U. of Victoria, Canada *
21  * MPI-K Heidelberg, Germany *
22  * U. of Bonn, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef TMVA_DNN_DEEPNET
30 #define TMVA_DNN_DEEPNET
31 
32 #include "TString.h"
33 
34 #include "TMVA/DNN/Functions.h"
36 
37 #include "TMVA/DNN/GeneralLayer.h"
38 #include "TMVA/DNN/DenseLayer.h"
39 #include "TMVA/DNN/ReshapeLayer.h"
41 
42 #include "TMVA/DNN/CNN/ConvLayer.h"
44 
45 #include "TMVA/DNN/RNN/RNNLayer.h"
46 
47 #ifdef HAVE_DAE
48 #include "TMVA/DNN/DAE/CompressionLayer.h"
49 #include "TMVA/DNN/DAE/CorruptionLayer.h"
50 #include "TMVA/DNN/DAE/ReconstructionLayer.h"
51 #include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
52 #endif
53 
54 #include <vector>
55 #include <cmath>
56 
57 
58 namespace TMVA {
59 namespace DNN {
60 
61  using namespace CNN;
62  using namespace RNN;
63  //using namespace DAE;
64 
65 /** \class TDeepNet
66  Generic Deep Neural Network class.
67  This classs encapsulates the information for all types of Deep Neural Networks.
68  \tparam Architecture The Architecture type that holds the
69  architecture-specific data types.
70  */
71 template <typename Architecture_t, typename Layer_t = VGeneralLayer<Architecture_t>>
72 class TDeepNet {
73 public:
74 
75  using Tensor_t = typename Architecture_t::Tensor_t;
76  using Matrix_t = typename Architecture_t::Matrix_t;
77  using Scalar_t = typename Architecture_t::Scalar_t;
78 
79 
80 private:
81  bool inline isInteger(Scalar_t x) const { return x == floor(x); }
82  size_t calculateDimension(int imgDim, int fltDim, int padding, int stride);
83 
84 private:
85  std::vector<Layer_t *> fLayers; ///< The layers consisting the DeepNet
86 
87  size_t fBatchSize; ///< Batch size used for training and evaluation.
88  size_t fInputDepth; ///< The depth of the input.
89  size_t fInputHeight; ///< The height of the input.
90  size_t fInputWidth; ///< The width of the input.
91 
92  size_t fBatchDepth; ///< The depth of the batch used for training/testing.
93  size_t fBatchHeight; ///< The height of the batch used for training/testing.
94  size_t fBatchWidth; ///< The width of the batch used for training/testing.
95 
96  bool fIsTraining; ///< Is the network training?
97 
98  ELossFunction fJ; ///< The loss function of the network.
99  EInitialization fI; ///< The initialization method of the network.
100  ERegularization fR; ///< The regularization used for the network.
101  Scalar_t fWeightDecay; ///< The weight decay factor.
102 
103 public:
104  /*! Default Constructor */
105  TDeepNet();
106 
107  /*! Constructor */
108  TDeepNet(size_t BatchSize, size_t InputDepth, size_t InputHeight, size_t InputWidth, size_t BatchDepth,
109  size_t BatchHeight, size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
110  ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0, bool isTraining = false);
111 
112  /*! Copy-constructor */
113  TDeepNet(const TDeepNet &);
114 
115  /*! Destructor */
116  ~TDeepNet();
117 
118  /*! Function for adding Convolution layer in the Deep Neural Network,
119  * with a given depth, filter height and width, striding in rows and columns,
120  * the zero paddings, as well as the activation function and the dropout
121  * probability. Based on these parameters, it calculates the width and height
122  * of the convolutional layer. */
123  TConvLayer<Architecture_t> *AddConvLayer(size_t depth, size_t filterHeight, size_t filterWidth, size_t strideRows,
124  size_t strideCols, size_t paddingHeight, size_t paddingWidth,
125  EActivationFunction f, Scalar_t dropoutProbability = 1.0);
126 
127  /*! Function for adding Convolution Layer in the Deep Neural Network,
128  * when the layer is already created. */
129  void AddConvLayer(TConvLayer<Architecture_t> *convLayer);
130 
131  /*! Function for adding Pooling layer in the Deep Neural Network,
132  * with a given filter height and width, striding in rows and columns as
133  * well as the dropout probability. The depth is same as the previous
134  * layer depth. Based on these parameters, it calculates the width and
135  * height of the pooling layer. */
136  TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(size_t frameHeight, size_t frameWidth, size_t strideRows,
137  size_t strideCols, Scalar_t dropoutProbability = 1.0);
138  /*! Function for adding Max Pooling layer in the Deep Neural Network,
139  * when the layer is already created. */
140  void AddMaxPoolLayer(CNN::TMaxPoolLayer<Architecture_t> *maxPoolLayer);
141 
142 
143  /*! Function for adding Recurrent Layer in the Deep Neural Network,
144  * with given parameters */
145  TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(size_t stateSize, size_t inputSize, size_t timeSteps,
146  bool rememberState = false,EActivationFunction f = EActivationFunction::kTanh);
147 
148  /*! Function for adding Vanilla RNN when the layer is already created
149  */
150  void AddBasicRNNLayer(TBasicRNNLayer<Architecture_t> *basicRNNLayer);
151 
152  /*! Function for adding Dense Connected Layer in the Deep Neural Network,
153  * with a given width, activation function and dropout probability.
154  * Based on the previous layer dimensions, it calculates the input width
155  * of the fully connected layer. */
156  TDenseLayer<Architecture_t> *AddDenseLayer(size_t width, EActivationFunction f, Scalar_t dropoutProbability = 1.0);
157 
158  /*! Function for adding Dense Layer in the Deep Neural Network, when
159  * the layer is already created. */
160  void AddDenseLayer(TDenseLayer<Architecture_t> *denseLayer);
161 
162  /*! Function for adding Reshape Layer in the Deep Neural Network, with a given
163  * height and width. It will take every matrix from the previous layer and
164  * reshape it to a matrix with new dimensions. */
165  TReshapeLayer<Architecture_t> *AddReshapeLayer(size_t depth, size_t height, size_t width, bool flattening);
166 
167  /*! Function for adding a Batch Normalization layer with given parameters */
168  TBatchNormLayer<Architecture_t> *AddBatchNormLayer(Scalar_t momentum = -1, Scalar_t epsilon = 0.0001);
169 
170  /*! Function for adding Reshape Layer in the Deep Neural Network, when
171  * the layer is already created. */
172  void AddReshapeLayer(TReshapeLayer<Architecture_t> *reshapeLayer);
173 
174 #ifdef HAVE_DAE /// DAE functions
175  /*! Function for adding Corruption layer in the Deep Neural Network,
176  * with given number of visibleUnits and hiddenUnits. It corrupts input
177  * according to given corruptionLevel and dropoutProbability. */
178  TCorruptionLayer<Architecture_t> *AddCorruptionLayer(size_t visibleUnits, size_t hiddenUnits,
179  Scalar_t dropoutProbability, Scalar_t corruptionLevel);
180 
181  /*! Function for adding Corruption Layer in the Deep Neural Network,
182  * when the layer is already created. */
183  void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
184 
185  /*! Function for adding Compression layer in the Deep Neural Network,
186  * with given number of visibleUnits and hiddenUnits. It compresses the input units
187  * taking weights and biases from prev layers. */
188  TCompressionLayer<Architecture_t> *AddCompressionLayer(size_t visibleUnits, size_t hiddenUnits,
189  Scalar_t dropoutProbability, EActivationFunction f,
190  std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
191 
192  /*! Function for adding Compression Layer in the Deep Neural Network, when
193  * the layer is already created. */
194  void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
195 
196  /*! Function for adding Reconstruction layer in the Deep Neural Network,
197  * with given number of visibleUnits and hiddenUnits. It reconstructs the input units
198  * taking weights and biases from prev layers. Same corruptionLevel and dropoutProbability
199  * must be passed as in corruptionLayer. */
200  TReconstructionLayer<Architecture_t> *AddReconstructionLayer(size_t visibleUnits, size_t hiddenUnits,
201  Scalar_t learningRate, EActivationFunction f,
202  std::vector<Matrix_t> weights,
203  std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
204  Scalar_t dropoutProbability);
205 
206  /*! Function for adding Reconstruction Layer in the Deep Neural Network, when
207  * the layer is already created. */
208  void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
209 
210  /*! Function for adding logisticRegressionLayer in the Deep Neural Network,
211  * with given number of inputUnits and outputUnits. It classifies the outputUnits. */
212  TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(size_t inputUnits, size_t outputUnits,
213  size_t testDataBatchSize,
214  Scalar_t learningRate);
215 
216  /*! Function for adding logisticRegressionLayer in the Deep Neural Network, when
217  * the layer is already created. */
218  void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
219 
220  /* To train the Deep AutoEncoder network with required number of Corruption, Compression and Reconstruction
221  * layers. */
222  void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
223  Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs, EActivationFunction f,
224  bool applyDropout = false);
225 
226  /* To classify outputLabel in Deep AutoEncoder. Should be used after PreTrain if required.
227  * Currently, it used Logistic Regression Layer. Otherwise we can use any other classification layer also.
228  */
229  void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
230  size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate, size_t epochs);
231 #endif
232 
233  /*! Function for initialization of the Neural Net. */
234  void Initialize();
235 
236  /*! Function that executes the entire forward pass in the network. */
237  void Forward(Tensor_t &input, bool applyDropout = false);
238 
239  /*! Function that reset some training flags after looping all the events but not the weights*/
240  void ResetTraining();
241 
242 
243 
244  /*! Function that executes the entire backward pass in the network. */
245  void Backward(const Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights);
246 
247 
248 #ifdef USE_PARALLEL_DEEPNET
249  /*! Function for parallel forward in the vector of deep nets, where the master
250  * net is the net calling this function. There is one batch for one deep net.*/
251  void ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
252  std::vector<TTensorBatch<Architecture_t>> &batches, bool applyDropout = false);
253 
254  /*! Function for parallel backward in the vector of deep nets, where the master
255  * net is the net calling this function and getting the updates from the other nets.
256  * There is one batch for one deep net.*/
257  void ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
258  std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
259 
260  /*! Function for parallel backward in the vector of deep nets, where the master
261  * net is the net calling this function and getting the updates from the other nets,
262  * following the momentum strategy. There is one batch for one deep net.*/
263  void ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
264  std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
265  Scalar_t momentum);
266 
267  /*! Function for parallel backward in the vector of deep nets, where the master
268  * net is the net calling this function and getting the updates from the other nets,
269  * following the Nestorov momentum strategy. There is one batch for one deep net.*/
270  void ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
271  std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
272  Scalar_t momentum);
273 
274 #endif // endif use parallel deepnet
275 
276  /*! Function that will update the weights and biases in the layers that
277  * contain weights and biases. */
278  void Update(Scalar_t learningRate);
279 
280  /*! Function for evaluating the loss, based on the activations stored
281  * in the last layer. */
282  Scalar_t Loss(const Matrix_t &groundTruth, const Matrix_t &weights, bool includeRegularization = true) const;
283 
284  /*! Function for evaluating the loss, based on the propagation of the given input. */
285  Scalar_t Loss(Tensor_t &input, const Matrix_t &groundTruth, const Matrix_t &weights,
286  bool inTraining = false, bool includeRegularization = true);
287 
288  /*! Function for computing the regularizaton term to be added to the loss function */
289  Scalar_t RegularizationTerm() const;
290 
291  /*! Prediction based on activations stored in the last layer. */
292  void Prediction(Matrix_t &predictions, EOutputFunction f) const;
293 
294  /*! Prediction for the given inputs, based on what network learned. */
295  void Prediction(Matrix_t &predictions, Tensor_t & input, EOutputFunction f);
296 
297  /*! Print the Deep Net Info */
298  void Print() const;
299 
300  /*! Get the layer in the vector of layers at poistion i */
301  inline Layer_t *GetLayerAt(size_t i) { return fLayers[i]; }
302  inline const Layer_t *GetLayerAt(size_t i) const { return fLayers[i]; }
303 
304  /* Depth and the output width of the network. */
305  inline size_t GetDepth() const { return fLayers.size(); }
306  inline size_t GetOutputWidth() const { return fLayers.back()->GetWidth(); }
307 
308  /* Return a reference to the layers. */
309  inline std::vector<Layer_t *> &GetLayers() { return fLayers; }
310  inline const std::vector<Layer_t *> &GetLayers() const { return fLayers; }
311 
312  /*! Remove all layers from the network. */
313  inline void Clear() { fLayers.clear(); }
314 
315  /*! Getters */
316  inline size_t GetBatchSize() const { return fBatchSize; }
317  inline size_t GetInputDepth() const { return fInputDepth; }
318  inline size_t GetInputHeight() const { return fInputHeight; }
319  inline size_t GetInputWidth() const { return fInputWidth; }
320 
321  inline size_t GetBatchDepth() const { return fBatchDepth; }
322  inline size_t GetBatchHeight() const { return fBatchHeight; }
323  inline size_t GetBatchWidth() const { return fBatchWidth; }
324 
325  inline bool IsTraining() const { return fIsTraining; }
326 
327  inline ELossFunction GetLossFunction() const { return fJ; }
328  inline EInitialization GetInitialization() const { return fI; }
329  inline ERegularization GetRegularization() const { return fR; }
330  inline Scalar_t GetWeightDecay() const { return fWeightDecay; }
331 
332  /*! Setters */
333  // FIXME many of these won't work as the data structure storing activations
334  // and gradients have not changed in all the layers, also params in layers
335  // have not changed either
336  inline void SetBatchSize(size_t batchSize) { fBatchSize = batchSize; }
337  inline void SetInputDepth(size_t inputDepth) { fInputDepth = inputDepth; }
338  inline void SetInputHeight(size_t inputHeight) { fInputHeight = inputHeight; }
339  inline void SetInputWidth(size_t inputWidth) { fInputWidth = inputWidth; }
340  inline void SetBatchDepth(size_t batchDepth) { fBatchDepth = batchDepth; }
341  inline void SetBatchHeight(size_t batchHeight) { fBatchHeight = batchHeight; }
342  inline void SetBatchWidth(size_t batchWidth) { fBatchWidth = batchWidth; }
343  inline void SetLossFunction(ELossFunction J) { fJ = J; }
344  inline void SetInitialization(EInitialization I) { fI = I; }
345  inline void SetRegularization(ERegularization R) { fR = R; }
346  inline void SetWeightDecay(Scalar_t weightDecay) { fWeightDecay = weightDecay; }
347 
348  void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
349 
350 };
351 
352 //
353 // Deep Net Class - Implementation
354 //
355 //______________________________________________________________________________
356 template <typename Architecture_t, typename Layer_t>
357 TDeepNet<Architecture_t, Layer_t>::TDeepNet()
358  : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
359  fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
360  fIsTraining(true), fWeightDecay(0.0)
361 {
362  // Nothing to do here.
363 }
364 
365 //______________________________________________________________________________
366 template <typename Architecture_t, typename Layer_t>
367 TDeepNet<Architecture_t, Layer_t>::TDeepNet(size_t batchSize, size_t inputDepth, size_t inputHeight, size_t inputWidth,
368  size_t batchDepth, size_t batchHeight, size_t batchWidth, ELossFunction J,
369  EInitialization I, ERegularization R, Scalar_t weightDecay, bool isTraining)
370  : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
371  fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
372  fR(R), fWeightDecay(weightDecay)
373 {
374  // Nothing to do here.
375 }
376 
377 //______________________________________________________________________________
378 template <typename Architecture_t, typename Layer_t>
379 TDeepNet<Architecture_t, Layer_t>::TDeepNet(const TDeepNet &deepNet)
380  : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
381  fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
382  fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
383  fWeightDecay(deepNet.fWeightDecay)
384 {
385  // Nothing to do here.
386 }
387 
388 //______________________________________________________________________________
389 template <typename Architecture_t, typename Layer_t>
390 TDeepNet<Architecture_t, Layer_t>::~TDeepNet()
391 {
392  // Relese the layers memory
393  for (auto layer : fLayers)
394  delete layer;
395  fLayers.clear();
396 }
397 
398 //______________________________________________________________________________
399 template <typename Architecture_t, typename Layer_t>
400 auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(int imgDim, int fltDim, int padding, int stride) -> size_t
401 {
402  Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
403  if (!isInteger(dimension) || dimension <= 0) {
404  this->Print();
405  int iLayer = fLayers.size();
406  Fatal("calculateDimension","Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
407  iLayer, imgDim, fltDim, padding, stride);
408  // std::cout << " calculateDimension - Not compatible hyper parameters (imgDim, fltDim, padding, stride)"
409  // << imgDim << " , " << fltDim << " , " << padding << " , " << stride<< " resulting dim is " << dimension << std::endl;
410  // std::exit(EXIT_FAILURE);
411  }
412 
413  return (size_t)dimension;
414 }
415 
416 //______________________________________________________________________________
417 template <typename Architecture_t, typename Layer_t>
418 TConvLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddConvLayer(size_t depth, size_t filterHeight,
419  size_t filterWidth, size_t strideRows,
420  size_t strideCols, size_t paddingHeight,
421  size_t paddingWidth, EActivationFunction f,
422  Scalar_t dropoutProbability)
423 {
424  // All variables defining a convolutional layer
425  size_t batchSize = this->GetBatchSize();
426  size_t inputDepth;
427  size_t inputHeight;
428  size_t inputWidth;
429  EInitialization init = this->GetInitialization();
430  ERegularization reg = this->GetRegularization();
431  Scalar_t decay = this->GetWeightDecay();
432 
433  if (fLayers.size() == 0) {
434  inputDepth = this->GetInputDepth();
435  inputHeight = this->GetInputHeight();
436  inputWidth = this->GetInputWidth();
437  } else {
438  Layer_t *lastLayer = fLayers.back();
439  inputDepth = lastLayer->GetDepth();
440  inputHeight = lastLayer->GetHeight();
441  inputWidth = lastLayer->GetWidth();
442  }
443 
444 
445 
446  // Create the conv layer
447  TConvLayer<Architecture_t> *convLayer = new TConvLayer<Architecture_t>(
448  batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
449  strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
450 
451  fLayers.push_back(convLayer);
452  return convLayer;
453 }
454 
455 //______________________________________________________________________________
456 template <typename Architecture_t, typename Layer_t>
457 void TDeepNet<Architecture_t, Layer_t>::AddConvLayer(TConvLayer<Architecture_t> *convLayer)
458 {
459  fLayers.push_back(convLayer);
460 }
461 
462 //______________________________________________________________________________
463 template <typename Architecture_t, typename Layer_t>
464 TMaxPoolLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddMaxPoolLayer(size_t frameHeight, size_t frameWidth,
465  size_t strideRows, size_t strideCols,
466  Scalar_t dropoutProbability)
467 {
468  size_t batchSize = this->GetBatchSize();
469  size_t inputDepth;
470  size_t inputHeight;
471  size_t inputWidth;
472 
473  if (fLayers.size() == 0) {
474  inputDepth = this->GetInputDepth();
475  inputHeight = this->GetInputHeight();
476  inputWidth = this->GetInputWidth();
477  } else {
478  Layer_t *lastLayer = fLayers.back();
479  inputDepth = lastLayer->GetDepth();
480  inputHeight = lastLayer->GetHeight();
481  inputWidth = lastLayer->GetWidth();
482  }
483 
484  TMaxPoolLayer<Architecture_t> *maxPoolLayer = new TMaxPoolLayer<Architecture_t>(
485  batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
486  strideRows, strideCols, dropoutProbability);
487 
488  // But this creates a copy or what?
489  fLayers.push_back(maxPoolLayer);
490 
491  return maxPoolLayer;
492 }
493 
494 //______________________________________________________________________________
495 template <typename Architecture_t, typename Layer_t>
496 void TDeepNet<Architecture_t, Layer_t>::AddMaxPoolLayer(TMaxPoolLayer<Architecture_t> *maxPoolLayer)
497 {
498  fLayers.push_back(maxPoolLayer);
499 }
500 
501 //______________________________________________________________________________
502 template <typename Architecture_t, typename Layer_t>
503 TBasicRNNLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBasicRNNLayer(size_t stateSize, size_t inputSize,
504  size_t timeSteps,
505  bool rememberState, EActivationFunction f)
506 {
507 
508  // should check if input and time size are consistent
509 
510  //std::cout << "Create RNN " << fLayers.size() << " " << this->GetInputHeight() << " " << this->GetInputWidth() << std::endl;
511  size_t inputHeight, inputWidth, inputDepth;
512  if (fLayers.size() == 0) {
513  inputHeight = this->GetInputHeight();
514  inputWidth = this->GetInputWidth();
515  inputDepth = this->GetInputDepth();
516  } else {
517  Layer_t *lastLayer = fLayers.back();
518  inputHeight = lastLayer->GetHeight();
519  inputWidth = lastLayer->GetWidth();
520  inputDepth = lastLayer->GetDepth();
521  }
522  if (inputSize != inputWidth) {
523  Error("AddBasicRNNLayer","Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
524  }
525  if (timeSteps != inputHeight || timeSteps != inputDepth) {
526  Error("AddBasicRNNLayer","Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
527  }
528 
529  TBasicRNNLayer<Architecture_t> *basicRNNLayer =
530  new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState,
531  f, fIsTraining, this->GetInitialization());
532  fLayers.push_back(basicRNNLayer);
533  return basicRNNLayer;
534 }
535 
536 //______________________________________________________________________________
537 template <typename Architecture_t, typename Layer_t>
538 void TDeepNet<Architecture_t, Layer_t>::AddBasicRNNLayer(TBasicRNNLayer<Architecture_t> *basicRNNLayer)
539 {
540  fLayers.push_back(basicRNNLayer);
541 }
542 
543 //DAE
544 #ifdef HAVE_DAE
545 
546 //______________________________________________________________________________
547 template <typename Architecture_t, typename Layer_t>
548 TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(size_t visibleUnits,
549  size_t hiddenUnits,
550  Scalar_t dropoutProbability,
551  Scalar_t corruptionLevel)
552 {
553  size_t batchSize = this->GetBatchSize();
554 
555  TCorruptionLayer<Architecture_t> *corruptionLayer =
556  new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
557  fLayers.push_back(corruptionLayer);
558  return corruptionLayer;
559 }
560 //______________________________________________________________________________
561 
562 template <typename Architecture_t, typename Layer_t>
563 void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
564 {
565  fLayers.push_back(corruptionLayer);
566 }
567 
568 //______________________________________________________________________________
569 template <typename Architecture_t, typename Layer_t>
570 TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
571  size_t visibleUnits, size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
572  std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
573 {
574  size_t batchSize = this->GetBatchSize();
575 
576  TCompressionLayer<Architecture_t> *compressionLayer = new TCompressionLayer<Architecture_t>(
577  batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
578  fLayers.push_back(compressionLayer);
579  return compressionLayer;
580 }
581 //______________________________________________________________________________
582 
583 template <typename Architecture_t, typename Layer_t>
584 void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
585 {
586  fLayers.push_back(compressionLayer);
587 }
588 
589 //______________________________________________________________________________
590 template <typename Architecture_t, typename Layer_t>
591 TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
592  size_t visibleUnits, size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
593  std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
594 {
595  size_t batchSize = this->GetBatchSize();
596 
597  TReconstructionLayer<Architecture_t> *reconstructionLayer = new TReconstructionLayer<Architecture_t>(
598  batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
599  fLayers.push_back(reconstructionLayer);
600  return reconstructionLayer;
601 }
602 //______________________________________________________________________________
603 
604 template <typename Architecture_t, typename Layer_t>
605 void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
606  TReconstructionLayer<Architecture_t> *reconstructionLayer)
607 {
608  fLayers.push_back(reconstructionLayer);
609 }
610 
611 //______________________________________________________________________________
612 template <typename Architecture_t, typename Layer_t>
613 TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
614  size_t inputUnits, size_t outputUnits, size_t testDataBatchSize, Scalar_t learningRate)
615 {
616  size_t batchSize = this->GetBatchSize();
617 
618  TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
619  new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
620  fLayers.push_back(logisticRegressionLayer);
621  return logisticRegressionLayer;
622 }
623 //______________________________________________________________________________
624 template <typename Architecture_t, typename Layer_t>
625 void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
626  TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
627 {
628  fLayers.push_back(logisticRegressionLayer);
629 }
630 #endif
631 
632 
633 //______________________________________________________________________________
634 template <typename Architecture_t, typename Layer_t>
635 TDenseLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddDenseLayer(size_t width, EActivationFunction f,
636  Scalar_t dropoutProbability)
637 {
638  size_t batchSize = this->GetBatchSize();
639  size_t inputWidth;
640  EInitialization init = this->GetInitialization();
641  ERegularization reg = this->GetRegularization();
642  Scalar_t decay = this->GetWeightDecay();
643 
644  if (fLayers.size() == 0) {
645  inputWidth = this->GetInputWidth();
646  } else {
647  Layer_t *lastLayer = fLayers.back();
648  inputWidth = lastLayer->GetWidth();
649  }
650 
651  TDenseLayer<Architecture_t> *denseLayer =
652  new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
653 
654  fLayers.push_back(denseLayer);
655 
656  return denseLayer;
657 }
658 
659 //______________________________________________________________________________
660 template <typename Architecture_t, typename Layer_t>
661 void TDeepNet<Architecture_t, Layer_t>::AddDenseLayer(TDenseLayer<Architecture_t> *denseLayer)
662 {
663  fLayers.push_back(denseLayer);
664 }
665 
666 //______________________________________________________________________________
667 template <typename Architecture_t, typename Layer_t>
668 TReshapeLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReshapeLayer(size_t depth, size_t height,
669  size_t width, bool flattening)
670 {
671  size_t batchSize = this->GetBatchSize();
672  size_t inputDepth;
673  size_t inputHeight;
674  size_t inputWidth;
675  size_t outputNSlices;
676  size_t outputNRows;
677  size_t outputNCols;
678 
679  if (fLayers.size() == 0) {
680  inputDepth = this->GetInputDepth();
681  inputHeight = this->GetInputHeight();
682  inputWidth = this->GetInputWidth();
683  } else {
684  Layer_t *lastLayer = fLayers.back();
685  inputDepth = lastLayer->GetDepth();
686  inputHeight = lastLayer->GetHeight();
687  inputWidth = lastLayer->GetWidth();
688  }
689 
690  if (flattening) {
691  outputNSlices = 1;
692  outputNRows = this->GetBatchSize();
693  outputNCols = depth * height * width;
694  size_t inputNCols = inputDepth * inputHeight * inputWidth;
695  if (outputNCols != 0 && outputNCols != inputNCols ) {
696  Info("AddReshapeLayer","Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
697  inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
698  }
699  outputNCols = inputNCols;
700  depth = 1;
701  height = 1;
702  width = outputNCols;
703  } else {
704  outputNSlices = this->GetBatchSize();
705  outputNRows = depth;
706  outputNCols = height * width;
707  }
708 
709  TReshapeLayer<Architecture_t> *reshapeLayer =
710  new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
711  outputNSlices, outputNRows, outputNCols, flattening);
712 
713  fLayers.push_back(reshapeLayer);
714 
715  return reshapeLayer;
716 }
717 
718 //______________________________________________________________________________
719 template <typename Architecture_t, typename Layer_t>
720 TBatchNormLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBatchNormLayer(Scalar_t momentum, Scalar_t epsilon)
721 {
722  int axis = -1;
723  size_t batchSize = this->GetBatchSize();
724  size_t inputDepth = 0;
725  size_t inputHeight = 0;
726  size_t inputWidth = 0;
727  // this is the shape of the output tensor (it is columnmajor by default)
728  // and it is normally (depth, hw, bsize) and for dense layers (bsize, w, 1)
729  std::vector<size_t> shape = {1, 1, 1};
730  if (fLayers.size() == 0) {
731  inputDepth = this->GetInputDepth();
732  inputHeight = this->GetInputHeight();
733  inputWidth = this->GetInputWidth();
734  // assume that is like for a dense layer
735  shape[0] = batchSize;
736  shape[1] = inputWidth;
737  shape[2] = 1;
738  } else {
739  Layer_t *lastLayer = fLayers.back();
740  inputDepth = lastLayer->GetDepth();
741  inputHeight = lastLayer->GetHeight();
742  inputWidth = lastLayer->GetWidth();
743  shape = lastLayer->GetOutput().GetShape();
744  if (dynamic_cast<TConvLayer<Architecture_t> *>(lastLayer) != nullptr ||
745  dynamic_cast<TMaxPoolLayer<Architecture_t> *>(lastLayer) != nullptr)
746  axis = 1; // use axis = channel axis for convolutional layer
747  if (shape.size() > 3) {
748  for (size_t i = 3; i < shape.size(); ++i)
749  shape[2] *= shape[i];
750  }
751  // if (axis == 1) {
752  // shape[0] = batchSize;
753  // shape[1] = inputDepth;
754  // shape[2] = inputHeight * inputWidth;
755  // }
756  // for RNN ?
757  }
758  std::cout << "addBNormLayer " << inputDepth << " , " << inputHeight << " , " << inputWidth << " , " << shape[0]
759  << " " << shape[1] << " " << shape[2] << std::endl;
760 
761  auto bnormLayer =
762  new TBatchNormLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, shape, axis, momentum, epsilon);
763 
764  fLayers.push_back(bnormLayer);
765 
766  return bnormLayer;
767 }
768 
769 //______________________________________________________________________________
770 template <typename Architecture_t, typename Layer_t>
771 void TDeepNet<Architecture_t, Layer_t>::AddReshapeLayer(TReshapeLayer<Architecture_t> *reshapeLayer)
772 {
773  fLayers.push_back(reshapeLayer);
774 }
775 
776 //______________________________________________________________________________
777 template <typename Architecture_t, typename Layer_t>
778 auto TDeepNet<Architecture_t, Layer_t>::Initialize() -> void
779 {
780  for (size_t i = 0; i < fLayers.size(); i++) {
781  fLayers[i]->Initialize();
782  }
783 }
784 
785 //______________________________________________________________________________
786 template <typename Architecture_t, typename Layer_t>
787 auto TDeepNet<Architecture_t, Layer_t>::ResetTraining() -> void
788 {
789  for (size_t i = 0; i < fLayers.size(); i++) {
790  fLayers[i]->ResetTraining();
791  }
792 }
793 
794 
795 //______________________________________________________________________________
796 template <typename Architecture_t, typename Layer_t>
797 auto TDeepNet<Architecture_t, Layer_t>::Forward( Tensor_t &input, bool applyDropout) -> void
798 {
799  fLayers.front()->Forward(input, applyDropout);
800 
801  for (size_t i = 1; i < fLayers.size(); i++) {
802  fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
803  //std::cout << "forward for layer " << i << std::endl;
804  // fLayers[i]->GetOutput()[0].Print();
805  }
806 }
807 
808 
809 #ifdef HAVE_DAE
810 //_____________________________________________________________________________
811 template <typename Architecture_t, typename Layer_t>
812 auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
813  std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
814  Scalar_t corruptionLevel, Scalar_t dropoutProbability, size_t epochs,
815  EActivationFunction f, bool applyDropout) -> void
816 {
817  std::vector<Matrix_t> inp1;
818  std::vector<Matrix_t> inp2;
819  size_t numOfHiddenLayers = sizeof(numHiddenUnitsPerLayer) / sizeof(numHiddenUnitsPerLayer[0]);
820  // size_t batchSize = this->GetBatchSize();
821  size_t visibleUnits = (size_t)input[0].GetNrows();
822 
823  AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
824  fLayers.back()->Initialize();
825  fLayers.back()->Forward(input, applyDropout);
826  // fLayers.back()->Print();
827 
828  AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
829  fLayers.back()->GetBiases());
830  fLayers.back()->Initialize();
831  fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout); // as we have to pass corrupt input
832 
833  AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
834  fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
835  fLayers.back()->Initialize();
836  fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
837  applyDropout); // as we have to pass compressed Input
838  fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
839  input);
840  // three layers are added, now pointer is on third layer
841  size_t weightsSize = fLayers.back()->GetWeights().size();
842  size_t biasesSize = fLayers.back()->GetBiases().size();
843  for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
844  // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
845  for (size_t j = 0; j < weightsSize; j++) {
846  Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
847  }
848  for (size_t j = 0; j < biasesSize; j++) {
849  Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
850  }
851  fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
852  fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
853  fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
854  fLayers[fLayers.size() - 3]->GetOutput(), input);
855  }
856  fLayers.back()->Print();
857 
858  for (size_t i = 1; i < numOfHiddenLayers; i++) {
859 
860  AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
861  fLayers.back()->Initialize();
862  fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
863  applyDropout); // as we have to pass compressed Input
864 
865  AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
866  fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
867  fLayers.back()->Initialize();
868  fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
869 
870  AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
871  fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
872  dropoutProbability);
873  fLayers.back()->Initialize();
874  fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
875  applyDropout); // as we have to pass compressed Input
876  fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
877  fLayers[fLayers.size() - 5]->GetOutput());
878 
879  // three layers are added, now pointer is on third layer
880  size_t _weightsSize = fLayers.back()->GetWeights().size();
881  size_t _biasesSize = fLayers.back()->GetBiases().size();
882  for (size_t epoch = 0; epoch < epochs - 1; epoch++) {
883  // fLayers[fLayers.size() - 3]->Forward(input,applyDropout);
884  for (size_t j = 0; j < _weightsSize; j++) {
885  Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
886  }
887  for (size_t j = 0; j < _biasesSize; j++) {
888  Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
889  }
890  fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
891  fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
892  fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
893  fLayers[fLayers.size() - 3]->GetOutput(),
894  fLayers[fLayers.size() - 5]->GetOutput());
895  }
896  fLayers.back()->Print();
897  }
898 }
899 
900 //______________________________________________________________________________
901 template <typename Architecture_t, typename Layer_t>
902 auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
903  std::vector<Matrix_t> &inputLabel, size_t outputUnits,
904  size_t testDataBatchSize, Scalar_t learningRate, size_t epochs) -> void
905 {
906  std::vector<Matrix_t> inp1;
907  std::vector<Matrix_t> inp2;
908  if (fLayers.size() == 0) // only Logistic Regression Layer
909  {
910  size_t inputUnits = input[0].GetNrows();
911 
912  AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
913  fLayers.back()->Initialize();
914  for (size_t i = 0; i < epochs; i++) {
915  fLayers.back()->Backward(inputLabel, inp1, input, inp2);
916  }
917  fLayers.back()->Forward(input, false);
918  fLayers.back()->Print();
919  } else { // if used after any other layer
920  size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
921  AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
922  fLayers.back()->Initialize();
923  for (size_t i = 0; i < epochs; i++) {
924  fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
925  }
926  fLayers.back()->Forward(testInput, false);
927  fLayers.back()->Print();
928  }
929 }
930 #endif
931 
932 //______________________________________________________________________________
933 template <typename Architecture_t, typename Layer_t>
934 auto TDeepNet<Architecture_t, Layer_t>::Backward(const Tensor_t &input, const Matrix_t &groundTruth,
935  const Matrix_t &weights) -> void
936 {
937  //Tensor_t inp1;
938  //Tensor_t inp2;
939  // Last layer should be dense layer
940  Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
941  Matrix_t last_output = fLayers.back()->GetOutputAt(0);
942  evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
943  last_output, weights);
944 
945  for (size_t i = fLayers.size() - 1; i > 0; i--) {
946  auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
947  auto &activations_backward = fLayers[i - 1]->GetOutput();
948  fLayers[i]->Backward(activation_gradient_backward, activations_backward);
949  }
950 
951  // need to have a dummy tensor (size=0) to pass for activation gradient backward which
952  // are not computed for the first layer
953  Tensor_t dummy;
954  fLayers[0]->Backward(dummy, input);
955 }
956 
957 #ifdef USE_PARALLEL_DEEPNET
958 
959 //______________________________________________________________________________
960 template <typename Architecture_t, typename Layer_t>
961 auto TDeepNet<Architecture_t, Layer_t>::ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
962  std::vector<TTensorBatch<Architecture_t>> &batches,
963  bool applyDropout) -> void
964 {
965  size_t depth = this->GetDepth();
966 
967  // The first layer of each deep net
968  for (size_t i = 0; i < nets.size(); i++) {
969  nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
970  }
971 
972  // The i'th layer of each deep net
973  for (size_t i = 1; i < depth; i++) {
974  for (size_t j = 0; j < nets.size(); j++) {
975  nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
976  }
977  }
978 }
979 
980 //______________________________________________________________________________
981 template <typename Architecture_t, typename Layer_t>
982 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
983  std::vector<TTensorBatch<Architecture_t>> &batches,
984  Scalar_t learningRate) -> void
985 {
986  std::vector<Matrix_t> inp1;
987  std::vector<Matrix_t> inp2;
988  size_t depth = this->GetDepth();
989 
990  // Evaluate the gradients of the last layers in each deep net
991  for (size_t i = 0; i < nets.size(); i++) {
992  evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
993  nets[i].GetLossFunction(), batches[i].GetOutput(),
994  nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
995  }
996 
997  // Backpropagate the error in i'th layer of each deep net
998  for (size_t i = depth - 1; i > 0; i--) {
999  for (size_t j = 0; j < nets.size(); j++) {
1000  nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1001  nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1002  }
1003  }
1004 
1005  std::vector<Matrix_t> dummy;
1006 
1007  // First layer of each deep net
1008  for (size_t i = 0; i < nets.size(); i++) {
1009  nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1010  }
1011 
1012  // Update and copy
1013  for (size_t i = 0; i < nets.size(); i++) {
1014  for (size_t j = 0; j < depth; j++) {
1015  Layer_t *masterLayer = this->GetLayerAt(j);
1016  Layer_t *layer = nets[i].GetLayerAt(j);
1017 
1018  masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1019  layer->CopyWeights(masterLayer->GetWeights());
1020 
1021  masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1022  layer->CopyBiases(masterLayer->GetBiases());
1023  }
1024  }
1025 }
1026 
1027 //______________________________________________________________________________
1028 template <typename Architecture_t, typename Layer_t>
1029 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1030  std::vector<TTensorBatch<Architecture_t>> &batches,
1031  Scalar_t learningRate, Scalar_t momentum) -> void
1032 {
1033  std::vector<Matrix_t> inp1;
1034  std::vector<Matrix_t> inp2;
1035  size_t depth = this->GetDepth();
1036 
1037  // Evaluate the gradients of the last layers in each deep net
1038  for (size_t i = 0; i < nets.size(); i++) {
1039  evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1040  nets[i].GetLossFunction(), batches[i].GetOutput(),
1041  nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1042  }
1043 
1044  // Backpropagate the error in i'th layer of each deep net
1045  for (size_t i = depth - 1; i > 0; i--) {
1046  Layer_t *masterLayer = this->GetLayerAt(i);
1047 
1048  for (size_t j = 0; j < nets.size(); j++) {
1049  Layer_t *layer = nets[j].GetLayerAt(i);
1050 
1051  layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1052  inp1, inp2);
1053  masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1054  masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1055  }
1056 
1057  masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1058  masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1059  }
1060 
1061  std::vector<Matrix_t> dummy;
1062 
1063  // First layer of each deep net
1064  Layer_t *masterFirstLayer = this->GetLayerAt(0);
1065  for (size_t i = 0; i < nets.size(); i++) {
1066  Layer_t *layer = nets[i].GetLayerAt(0);
1067 
1068  layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1069 
1070  masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1071  masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1072  }
1073 
1074  masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1075  masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1076 
1077  for (size_t i = 0; i < depth; i++) {
1078  Layer_t *masterLayer = this->GetLayerAt(i);
1079  masterLayer->Update(1.0);
1080 
1081  for (size_t j = 0; j < nets.size(); j++) {
1082  Layer_t *layer = nets[j].GetLayerAt(i);
1083 
1084  layer->CopyWeights(masterLayer->GetWeights());
1085  layer->CopyBiases(masterLayer->GetBiases());
1086  }
1087  }
1088 }
1089 
1090 //______________________________________________________________________________
1091 template <typename Architecture_t, typename Layer_t>
1092 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1093  std::vector<TTensorBatch<Architecture_t>> &batches,
1094  Scalar_t learningRate, Scalar_t momentum) -> void
1095 {
1096  std::cout << "Parallel Backward Nestorov" << std::endl;
1097  std::vector<Matrix_t> inp1;
1098  std::vector<Matrix_t> inp2;
1099  size_t depth = this->GetDepth();
1100 
1101  // Evaluate the gradients of the last layers in each deep net
1102  for (size_t i = 0; i < nets.size(); i++) {
1103  evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1104  nets[i].GetLossFunction(), batches[i].GetOutput(),
1105  nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1106  }
1107 
1108  // Backpropagate the error in i'th layer of each deep net
1109  for (size_t i = depth - 1; i > 0; i--) {
1110  for (size_t j = 0; j < nets.size(); j++) {
1111  Layer_t *layer = nets[j].GetLayerAt(i);
1112 
1113  layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1114  inp1, inp2);
1115  }
1116  }
1117 
1118  std::vector<Matrix_t> dummy;
1119 
1120  // First layer of each deep net
1121  for (size_t i = 0; i < nets.size(); i++) {
1122  Layer_t *layer = nets[i].GetLayerAt(0);
1123  layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1124  }
1125 
1126  for (size_t i = 0; i < depth; i++) {
1127  Layer_t *masterLayer = this->GetLayerAt(i);
1128  for (size_t j = 0; j < nets.size(); j++) {
1129  Layer_t *layer = nets[j].GetLayerAt(i);
1130 
1131  layer->CopyWeights(masterLayer->GetWeights());
1132  layer->CopyBiases(masterLayer->GetBiases());
1133 
1134  layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1135  layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1136  }
1137 
1138  for (size_t j = 0; j < nets.size(); j++) {
1139  Layer_t *layer = nets[j].GetLayerAt(i);
1140 
1141  masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1142  masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1143  }
1144 
1145  masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1146  masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1147 
1148  masterLayer->Update(1.0);
1149  }
1150 }
1151 #endif // use parallel deep net
1152 
1153 //______________________________________________________________________________
1154 template <typename Architecture_t, typename Layer_t>
1155 auto TDeepNet<Architecture_t, Layer_t>::Update(Scalar_t learningRate) -> void
1156 {
1157  for (size_t i = 0; i < fLayers.size(); i++) {
1158  fLayers[i]->Update(learningRate);
1159  }
1160 }
1161 
1162 //______________________________________________________________________________
1163 template <typename Architecture_t, typename Layer_t>
1164 auto TDeepNet<Architecture_t, Layer_t>::Loss(const Matrix_t &groundTruth, const Matrix_t &weights,
1165  bool includeRegularization) const -> Scalar_t
1166 {
1167  // Last layer should not be deep
1168  auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1169 
1170  includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1171  if (includeRegularization) {
1172  loss += RegularizationTerm();
1173  }
1174 
1175  return loss;
1176 }
1177 
1178 //______________________________________________________________________________
1179 template <typename Architecture_t, typename Layer_t>
1180 auto TDeepNet<Architecture_t, Layer_t>::Loss(Tensor_t &input, const Matrix_t &groundTruth,
1181  const Matrix_t &weights, bool inTraining, bool includeRegularization)
1182  -> Scalar_t
1183 {
1184  Forward(input, inTraining);
1185  return Loss(groundTruth, weights, includeRegularization);
1186 }
1187 
1188 //______________________________________________________________________________
1189 template <typename Architecture_t, typename Layer_t>
1190 auto TDeepNet<Architecture_t, Layer_t>::RegularizationTerm() const -> Scalar_t
1191 {
1192  Scalar_t reg = 0.0;
1193  for (size_t i = 0; i < fLayers.size(); i++) {
1194  for (size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1195  reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1196  }
1197  }
1198  return this->GetWeightDecay() * reg;
1199 }
1200 
1201 
1202 //______________________________________________________________________________
1203 template <typename Architecture_t, typename Layer_t>
1204 auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, EOutputFunction f) const -> void
1205 {
1206  // Last layer should not be deep (assume output is a matrix)
1207  evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1208 }
1209 
1210 //______________________________________________________________________________
1211 template <typename Architecture_t, typename Layer_t>
1212 auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, Tensor_t & input,
1213  EOutputFunction f) -> void
1214 {
1215  Forward(input, false);
1216  // Last layer should not be deep
1217  evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1218 }
1219 
1220 //______________________________________________________________________________
1221 template <typename Architecture_t, typename Layer_t>
1222 auto TDeepNet<Architecture_t, Layer_t>::Print() const -> void
1223 {
1224  std::cout << "DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1225  std::cout << " Input = ( " << this->GetInputDepth();
1226  std::cout << ", " << this->GetInputHeight();
1227  std::cout << ", " << this->GetInputWidth() << " )";
1228  std::cout << " Batch size = " << this->GetBatchSize();
1229  std::cout << " Loss function = " << static_cast<char>(this->GetLossFunction()) << std::endl;
1230 
1231  //std::cout << "\t Layers: " << std::endl;
1232 
1233  for (size_t i = 0; i < fLayers.size(); i++) {
1234  std::cout << "\tLayer " << i << "\t";
1235  fLayers[i]->Print();
1236  }
1237 }
1238 
1239 //______________________________________________________________________________
1240 template <typename Architecture_t, typename Layer_t>
1241 void TDeepNet<Architecture_t, Layer_t>::SetDropoutProbabilities(
1242  const std::vector<Double_t> & probabilities)
1243 {
1244  for (size_t i = 0; i < fLayers.size(); i++) {
1245  if (i < probabilities.size()) {
1246  fLayers[i]->SetDropoutProbability(probabilities[i]);
1247  } else {
1248  fLayers[i]->SetDropoutProbability(1.0);
1249  }
1250  }
1251 }
1252 
1253 
1254 } // namespace DNN
1255 } // namespace TMVA
1256 
1257 #endif