Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
DenseLayer.h
Go to the documentation of this file.
1 
2 // Author: Vladimir Ilievski
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TDenseLayer *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Dense Layer Class *
12  * *
13  * Authors (alphabetical): *
14  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005-2015: *
17  * CERN, Switzerland *
18  * U. of Victoria, Canada *
19  * MPI-K Heidelberg, Germany *
20  * U. of Bonn, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef TMVA_DNN_DENSELAYER
28 #define TMVA_DNN_DENSELAYER
29 
30 #include "TMatrix.h"
31 
32 #include "TMVA/DNN/GeneralLayer.h"
33 #include "TMVA/DNN/Functions.h"
35 
36 #include <iostream>
37 #include <iomanip>
38 
39 namespace TMVA {
40 namespace DNN {
41 /** \class TDenseLayer
42 
43 Generic layer class.
44 
45 This generic layer class represents a dense layer of a neural network with
46 a given width n and activation function f. The activation function of each
47 layer is given by \f$\mathbf{u} = \mathbf{W}\mathbf{x} + \boldsymbol{\theta}\f$.
48 
49 In addition to the weight and bias matrices, each layer allocates memory
50 for its activations and the corresponding input tensor before evaluation of
51 the activation function as well as the gradients of the weights and biases.
52 
53 The layer provides member functions for the forward propagation of
54 activations through the given layer.
55 */
56 template <typename Architecture_t>
57 class TDenseLayer : public VGeneralLayer<Architecture_t> {
58 public:
59 
60  using Scalar_t = typename Architecture_t::Scalar_t;
61  using Matrix_t = typename Architecture_t::Matrix_t;
62  using Tensor_t = typename Architecture_t::Tensor_t;
63 
64 private:
65 
66  Tensor_t fInputActivation; /// output of GEMM and input to activation function
67  Tensor_t fDerivatives; /// activation functgion gradient
68 
69  Scalar_t fDropoutProbability; ///< Probability that an input is active.
70 
71  EActivationFunction fF; ///< Activation function of the layer.
72  ERegularization fReg; ///< The regularization method.
73  Scalar_t fWeightDecay; ///< The weight decay.
74 
75  typename Architecture_t::ActivationDescriptor_t fActivationDesc; // the descriptor for the activation function
76 
77 public:
78  /*! Constructor */
79  TDenseLayer(size_t BatchSize, size_t InputWidth, size_t Width, EInitialization init, Scalar_t DropoutProbability,
80  EActivationFunction f, ERegularization reg, Scalar_t weightDecay);
81 
82  /*! Copy the dense layer provided as a pointer */
83  TDenseLayer(TDenseLayer<Architecture_t> *layer);
84 
85  /*! Copy Constructor */
86  TDenseLayer(const TDenseLayer &);
87 
88  /*! Destructor */
89  ~TDenseLayer();
90 
91  /*! Compute activation of the layer for the given input. The input
92  * must be in 3D tensor form with the different matrices corresponding to
93  * different events in the batch. Computes activations as well as
94  * the first partial derivative of the activation function at those
95  * activations. */
96  void Forward(Tensor_t &input, bool applyDropout = false);
97 
98  /*! Compute weight, bias and activation gradients. Uses the precomputed
99  * first partial derviatives of the activation function computed during
100  * forward propagation and modifies them. Must only be called directly
101  * a the corresponding call to Forward(...). */
102  void Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward );
103  /// std::vector<Matrix_t> &inp1, std::vector<Matrix_t> &inp2);
104 
105  /*! Printing the layer info. */
106  void Print() const;
107 
108  /*! Writes the information and the weights about the layer in an XML node. */
109  virtual void AddWeightsXMLTo(void *parent);
110 
111  /*! Read the information and the weights about the layer from XML node. */
112  virtual void ReadWeightsFromXML(void *parent);
113 
114  /*! Set dropout probabilities */
115  virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; }
116 
117  /*! Getters */
118  Scalar_t GetDropoutProbability() const { return fDropoutProbability; }
119 
120  /* return output of Gemm before computing the activation function */
121  const Tensor_t &GetInputActivation() const { return fInputActivation; }
122  Tensor_t &GetInputActivation() { return fInputActivation; }
123 
124  EActivationFunction GetActivationFunction() const { return fF; }
125  ERegularization GetRegularization() const { return fReg; }
126  Scalar_t GetWeightDecay() const { return fWeightDecay; }
127 };
128 
129 //
130 //
131 // The Dense Layer Class - Implementation
132 //______________________________________________________________________________
133 template <typename Architecture_t>
134 TDenseLayer<Architecture_t>::TDenseLayer(size_t batchSize, size_t inputWidth, size_t width, EInitialization init,
135  Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
136  Scalar_t weightDecay)
137  : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
138  batchSize, width, init),
139  fInputActivation(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
140 {
141  // should be {1, batchSize, width} but take from output
142  fInputActivation = Tensor_t ( this->GetOutput().GetShape() );
143  fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
144 
145  Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
146 }
147 
148 //______________________________________________________________________________
149 template <typename Architecture_t>
150 TDenseLayer<Architecture_t>::TDenseLayer(TDenseLayer<Architecture_t> *layer) :
151  VGeneralLayer<Architecture_t>(layer),
152  fInputActivation( layer->GetInputActivation().GetShape() ),
153  fDropoutProbability(layer->GetDropoutProbability()),
154  fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
155 {
156  fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
157  Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
158 }
159 
160 //______________________________________________________________________________
161 template <typename Architecture_t>
162 TDenseLayer<Architecture_t>::TDenseLayer(const TDenseLayer &layer) :
163  VGeneralLayer<Architecture_t>(layer),
164  fInputActivation( layer->GetInputActivation()),
165  fDropoutProbability(layer.fDropoutProbability),
166  fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
167 {
168  fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
169  Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
170 }
171 
172 //______________________________________________________________________________
173 template <typename Architecture_t>
174 TDenseLayer<Architecture_t>::~TDenseLayer()
175 {
176  // release activation descriptor
177  Architecture_t::ReleaseDescriptor(fActivationDesc);
178 }
179 
180 
181 
182 
183 //______________________________________________________________________________
184 template <typename Architecture_t>
185 auto TDenseLayer<Architecture_t>::Forward( Tensor_t &input, bool applyDropout) -> void
186 {
187  if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
188  //
189  Architecture_t::DropoutForward(input, static_cast<TDescriptors *> (nullptr),
190  static_cast<TWorkspace *> (nullptr),
191  this->GetDropoutProbability());
192  }
193  Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0));
194  Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0));
195 
196  //evaluate<Architecture_t>(this->GetOutput(), this->GetActivationFunction());
197  Architecture_t::Copy(this->GetInputActivation(),this->GetOutput());
198 
199  Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc);
200 }
201 
202 //______________________________________________________________________________
203 template <typename Architecture_t>
204 auto TDenseLayer<Architecture_t>::Backward(Tensor_t &gradients_backward, const Tensor_t &activations_backward) -> void
205 /// std::vector<Matrix_t> & /*inp1*/, std::vector<Matrix_t> &
206 //// /*inp2*/) -> void
207 {
208 
209  if (this->GetDropoutProbability() != 1.0) {
210  Architecture_t::DropoutBackward(this->GetActivationGradients(),
211  static_cast<TDescriptors *> (nullptr),
212  static_cast<TWorkspace *> (nullptr));
213  }
214 
215  Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(),
216  this->GetActivationGradients(), this->GetInputActivation(),
217  this->GetActivationFunction(), fActivationDesc);
218 
219  Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
220  fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0),
221  activations_backward);
222 
223  addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
224  this->GetWeightDecay(), this->GetRegularization());
225 }
226 
227 //______________________________________________________________________________
228 template <typename Architecture_t>
229 void TDenseLayer<Architecture_t>::Print() const
230 {
231  std::cout << " DENSE Layer: \t";
232  std::cout << " ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols(); // input size
233  std::cout << " , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() << " ) "; // layer width
234 
235  std::cout << "\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() << " ," << std::setw(6) << this->GetOutput().GetShape()[0] << " ," << std::setw(6) << this->GetOutput().GetShape()[1] << " ) ";
236 
237  std::vector<std::string> activationNames = { "Identity","Relu","Sigmoid","Tanh","SymmRelu","SoftSign","Gauss" };
238  std::cout << "\t Activation Function = ";
239  std::cout << activationNames[ static_cast<int>(fF) ];
240  if (fDropoutProbability != 1.) std::cout << "\t Dropout prob. = " << fDropoutProbability;
241  std::cout << std::endl;
242 }
243 
244 //______________________________________________________________________________
245 
246 template <typename Architecture_t>
247 void TDenseLayer<Architecture_t>::AddWeightsXMLTo(void *parent)
248 {
249  // write layer width activation function + weigbht and bias matrices
250 
251  auto layerxml = gTools().xmlengine().NewChild(parent, 0, "DenseLayer");
252 
253  gTools().xmlengine().NewAttr(layerxml, 0, "Width", gTools().StringFromInt(this->GetWidth()));
254 
255  int activationFunction = static_cast<int>(this -> GetActivationFunction());
256  gTools().xmlengine().NewAttr(layerxml, 0, "ActivationFunction",
257  TString::Itoa(activationFunction, 10));
258  // write weights and bias matrix
259  this->WriteMatrixToXML(layerxml, "Weights", this -> GetWeightsAt(0));
260  this->WriteMatrixToXML(layerxml, "Biases", this -> GetBiasesAt(0));
261 }
262 
263 //______________________________________________________________________________
264 template <typename Architecture_t>
265 void TDenseLayer<Architecture_t>::ReadWeightsFromXML(void *parent)
266 {
267  // Read layer weights and biases from XML
268  this->ReadMatrixXML(parent,"Weights", this -> GetWeightsAt(0));
269  this->ReadMatrixXML(parent,"Biases", this -> GetBiasesAt(0));
270 
271 }
272 
273 
274 } // namespace DNN
275 } // namespace TMVA
276 
277 #endif