Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
Net.h
Go to the documentation of this file.
1 // @(#)root/tmva: $Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef TMVA_DNN_NET
13 #define TMVA_DNN_NET
14 
15 #include <vector>
16 #include <iostream>
17 
18 #include "Layer.h"
19 
20 namespace TMVA {
21 namespace DNN {
22 
23 /** \class TNet
24 
25  Generic neural network class.
26 
27  This generic neural network class represents a concrete neural
28  network through a vector of layers and coordinates the forward
29  and backward propagation through the net.
30 
31  The net takes as input a batch from the training data given in
32  matrix form, with each row corresponding to a certain training
33  event.
34 
35  On construction, the neural network allocates all the memory
36  required for the training of the neural net and keeps it until
37  its destruction.
38 
39  The Architecture type argument simply holds the
40  architecture-specific data types, which are just the matrix type
41  Matrix_t and the used scalar type Scalar_t.
42 
43  \tparam Architecture The Architecture type that holds the
44  \tparam Layer_t The type used for the layers. Can be either
45  Layer<Architecture> or SharedWeightLayer<Architecture>.
46  datatypes for a given architecture.
47 */
48 template<typename Architecture_t, typename Layer_t = TLayer<Architecture_t>>
49  class TNet {
50 
51 public:
52  using Matrix_t = typename Architecture_t::Matrix_t;
53  using Scalar_t = typename Architecture_t::Scalar_t;
54  using LayerIterator_t = typename std::vector<Layer_t>::iterator;
55 
56 private:
57  size_t fBatchSize; ///< Batch size for training and evaluation of the Network.
58  size_t fInputWidth; ///< Number of features in a single input event.
59 
60  std::vector<Layer_t> fLayers; ///< Layers in the network.
61 
62  Matrix_t fDummy; ///< Empty matrix for last step in back propagation.
63  ELossFunction fJ; ///< The loss function of the network.
64  ERegularization fR; ///< The regularization used for the network.
65  Scalar_t fWeightDecay; ///< The weight decay factor.
66 
67 public:
68  TNet();
69  TNet(const TNet & other);
70  template<typename OtherArchitecture_t>
71  TNet(size_t batchSize, const TNet<OtherArchitecture_t> &);
72  /*! Construct a neural net for a given batch size with
73  * given output function * and regularization. */
74  TNet(size_t batchSize,
75  size_t inputWidth,
76  ELossFunction fJ,
77  ERegularization fR = ERegularization::kNone,
78  Scalar_t fWeightDecay = 0.0);
79  /*! Create a clone that uses the same weight and biases matrices but
80  * potentially a difference batch size. */
81  TNet<Architecture_t, TSharedLayer<Architecture_t>> CreateClone(size_t batchSize);
82 
83  /*! Add a layer of the given size to the neural net. */
84  void AddLayer(size_t width, EActivationFunction f,
85  Scalar_t dropoutProbability = 1.0);
86 
87  /*! Remove all layers from the network.*/
88  void Clear();
89 
90  /*! Add a layer which shares its weights with another TNet instance. */
91  template <typename SharedLayer>
92  void AddLayer(SharedLayer & layer);
93 
94  /*! Iterator to the first layer of the net. */
95  LayerIterator_t LayersBegin() {return fLayers;}
96 
97  /*! Iterator to the last layer of the net. */
98  LayerIterator_t LayersEnd() {return fLayers;}
99 
100  /*! Initialize the weights in the net with the
101  * initialization method. */
102  inline void Initialize(EInitialization m);
103 
104  /*! Initialize the gradients in the net to zero. Required if net is
105  * used to store velocities of momentum-based minimization techniques. */
106  inline void InitializeGradients();
107 
108  /*! Forward a given input through the neural net. Computes
109  * all layer activations up to the output layer */
110  inline void Forward(Matrix_t& X, bool applyDropout = false);
111 
112  /*! Compute the weight gradients in the net from the given training
113  * samples X and training labels Y. */
114  inline void Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights);
115 
116  /*! Evaluate the loss function of the net using the activations
117  * that are currently stored in the output layer. */
118  inline Scalar_t Loss(const Matrix_t &Y, const Matrix_t &weights, bool includeRegularization = true) const;
119 
120  /*! Propagate the input batch X through the net and evaluate the
121  * error function for the resulting activations of the output
122  * layer */
123  inline Scalar_t Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights, bool applyDropout = false,
124  bool includeRegularization = true);
125 
126  /*! Compute the neural network predictionion obtained from forwarding the
127  * batch X through the neural network and applying the output function
128  * f to the activation of the last layer in the network. */
129  inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f);
130 
131  /*! Compute the neural network rediction obtained from applying the output
132  * function f to the activation of the last layer in the network. */
133  inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) const;
134 
135  Scalar_t GetNFlops();
136 
137  size_t GetDepth() const {return fLayers.size();}
138  size_t GetBatchSize() const {return fBatchSize;}
139  Layer_t & GetLayer(size_t i) {return fLayers[i];}
140  const Layer_t & GetLayer(size_t i) const {return fLayers[i];}
141  ELossFunction GetLossFunction() const {return fJ;}
142  Matrix_t & GetOutput() {return fLayers.back().GetOutput();}
143  size_t GetInputWidth() const {return fInputWidth;}
144  size_t GetOutputWidth() const {return fLayers.back().GetWidth();}
145  ERegularization GetRegularization() const {return fR;}
146  Scalar_t GetWeightDecay() const {return fWeightDecay;}
147 
148  void SetBatchSize(size_t batchSize) {fBatchSize = batchSize;}
149  void SetInputWidth(size_t inputWidth) {fInputWidth = inputWidth;}
150  void SetRegularization(ERegularization R) {fR = R;}
151  void SetLossFunction(ELossFunction J) {fJ = J;}
152  void SetWeightDecay(Scalar_t weightDecay) {fWeightDecay = weightDecay;}
153  void SetDropoutProbabilities(const std::vector<Double_t> & probabilities);
154 
155  void Print();
156 };
157 
158 //______________________________________________________________________________
159 template<typename Architecture_t, typename Layer_t>
160  TNet<Architecture_t, Layer_t>::TNet()
161  : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),
162  fJ(ELossFunction::kMeanSquaredError), fR(ERegularization::kNone),
163  fWeightDecay(0.0)
164 {
165  // Nothing to do here.
166 }
167 
168 //______________________________________________________________________________
169 template<typename Architecture_t, typename Layer_t>
170  TNet<Architecture_t, Layer_t>::TNet(const TNet & other)
171  : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),
172  fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),
173  fWeightDecay(other.fWeightDecay)
174 {
175  // Nothing to do here.
176 }
177 
178 //______________________________________________________________________________
179 template<typename Architecture_t, typename Layer_t>
180 template<typename OtherArchitecture_t>
181 TNet<Architecture_t, Layer_t>::TNet(size_t batchSize,
182  const TNet<OtherArchitecture_t> & other)
183  : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),
184  fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),
185  fWeightDecay(other.GetWeightDecay())
186 {
187  fLayers.reserve(other.GetDepth());
188  for (size_t i = 0; i < other.GetDepth(); i++) {
189  AddLayer(other.GetLayer(i).GetWidth(),
190  other.GetLayer(i).GetActivationFunction(),
191  other.GetLayer(i).GetDropoutProbability());
192  fLayers[i].GetWeights() = (TMatrixT<Scalar_t>) other.GetLayer(i).GetWeights();
193  fLayers[i].GetBiases() = (TMatrixT<Scalar_t>) other.GetLayer(i).GetBiases();
194  }
195 }
196 
197 //______________________________________________________________________________
198 template<typename Architecture_t, typename Layer_t>
199  TNet<Architecture_t, Layer_t>::TNet(size_t batchSize,
200  size_t inputWidth,
201  ELossFunction J,
202  ERegularization R,
203  Scalar_t weightDecay)
204  : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),
205  fJ(J), fR(R), fWeightDecay(weightDecay)
206 {
207  // Nothing to do here.
208 }
209 
210 //______________________________________________________________________________
211 template<typename Architecture_t, typename Layer_t>
212  auto TNet<Architecture_t, Layer_t>::CreateClone(size_t BatchSize)
213  -> TNet<Architecture_t, TSharedLayer<Architecture_t>>
214 {
215  TNet<Architecture_t, TSharedLayer<Architecture_t>> other(BatchSize, fInputWidth,
216  fJ, fR);
217  for (auto &l : fLayers) {
218  other.AddLayer(l);
219  }
220  return other;
221 }
222 
223 //______________________________________________________________________________
224 template<typename Architecture_t, typename Layer_t>
225  void TNet<Architecture_t, Layer_t>::AddLayer(size_t width,
226  EActivationFunction f,
227  Scalar_t dropoutProbability)
228 {
229  if (fLayers.size() == 0) {
230  fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);
231  } else {
232  size_t prevWidth = fLayers.back().GetWidth();
233  fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);
234  }
235 }
236 
237 //______________________________________________________________________________
238 template<typename Architecture_t, typename Layer_t>
239  void TNet<Architecture_t, Layer_t>::Clear()
240 {
241  fLayers.clear();
242 }
243 
244 //______________________________________________________________________________
245 template<typename Architecture_t, typename Layer_t>
246  template<typename SharedLayer_t>
247  inline void TNet<Architecture_t, Layer_t>::AddLayer(SharedLayer_t & layer)
248 {
249  fLayers.emplace_back(fBatchSize, layer);
250 }
251 
252 //______________________________________________________________________________
253 template<typename Architecture_t, typename Layer_t>
254  inline void TNet<Architecture_t, Layer_t>::Initialize(EInitialization m)
255 {
256  for (auto &l : fLayers) {
257  l.Initialize(m);
258  }
259 }
260 
261 //______________________________________________________________________________
262 template<typename Architecture_t, typename Layer_t>
263  inline void TNet<Architecture_t, Layer_t>::InitializeGradients()
264 {
265  for (auto &l : fLayers) {
266  initialize<Architecture_t>(l.GetWeightGradients(), EInitialization::kZero);
267  initialize<Architecture_t>(l.GetBiasGradients(), EInitialization::kZero);
268  }
269 }
270 
271 //______________________________________________________________________________
272 template<typename Architecture_t, typename Layer_t>
273 inline void TNet<Architecture_t, Layer_t>::Forward(Matrix_t &input,
274  bool applyDropout)
275 {
276  fLayers.front().Forward(input, applyDropout);
277 
278  for (size_t i = 1; i < fLayers.size(); i++) {
279  fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);
280  }
281 }
282 
283 //______________________________________________________________________________
284 template <typename Architecture_t, typename Layer_t>
285 inline void TNet<Architecture_t, Layer_t>::Backward(const Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights)
286 {
287 
288  evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(), fJ, Y, fLayers.back().GetOutput(),
289  weights);
290 
291  for (size_t i = fLayers.size()-1; i > 0; i--) {
292  auto & activation_gradient_backward
293  = fLayers[i-1].GetActivationGradients();
294  auto & activations_backward
295  = fLayers[i-1].GetOutput();
296  fLayers[i].Backward(activation_gradient_backward,
297  activations_backward, fR, fWeightDecay);
298  }
299  fLayers[0].Backward(fDummy, X, fR, fWeightDecay);
300 
301 }
302 
303 //______________________________________________________________________________
304 template <typename Architecture_t, typename Layer_t>
305 inline auto TNet<Architecture_t, Layer_t>::Loss(const Matrix_t &Y, const Matrix_t &weights,
306  bool includeRegularization) const -> Scalar_t
307 {
308  auto loss = evaluate<Architecture_t>(fJ, Y, fLayers.back().GetOutput(), weights);
309  includeRegularization &= (fR != ERegularization::kNone);
310  if (includeRegularization) {
311  for (auto &l : fLayers) {
312  loss += fWeightDecay * regularization<Architecture_t>(l.GetWeights(), fR);
313  }
314  }
315  return loss;
316 }
317 
318 //______________________________________________________________________________
319 template <typename Architecture_t, typename Layer_t>
320 inline auto TNet<Architecture_t, Layer_t>::Loss(Matrix_t &X, const Matrix_t &Y, const Matrix_t &weights,
321  bool applyDropout, bool includeRegularization) -> Scalar_t
322 {
323  Forward(X, applyDropout);
324  return Loss(Y, weights, includeRegularization);
325 }
326 
327 //______________________________________________________________________________
328 template<typename Architecture_t, typename Layer_t>
329  inline void TNet<Architecture_t, Layer_t>::Prediction(Matrix_t &Yhat,
330  Matrix_t &X,
331  EOutputFunction f)
332 {
333  Forward(X, false);
334  evaluate<Architecture_t>(Yhat, f, fLayers.back().GetOutput());
335 }
336 
337 //______________________________________________________________________________
338 template<typename Architecture_t, typename Layer_t>
339  inline void TNet<Architecture_t, Layer_t>::Prediction(Matrix_t &Y_hat,
340  EOutputFunction f) const
341 {
342  evaluate<Architecture_t>(Y_hat, f, fLayers.back().GetOutput());
343 }
344 
345 //______________________________________________________________________________
346 template<typename Architecture_t, typename Layer_t>
347 auto TNet<Architecture_t, Layer_t>::GetNFlops()
348  -> Scalar_t
349 {
350  Scalar_t flops = 0;
351 
352  Scalar_t nb = (Scalar_t) fBatchSize;
353  Scalar_t nlp = (Scalar_t) fInputWidth;
354 
355  for(size_t i = 0; i < fLayers.size(); i++) {
356  Layer_t & layer = fLayers[i];
357  Scalar_t nl = (Scalar_t) layer.GetWidth();
358 
359  // Forward propagation.
360  flops += nb * nl * (2.0 * nlp - 1); // Matrix mult.
361  flops += nb * nl; // Add bias values.
362  flops += 2 * nb * nl; // Apply activation function and compute
363  // derivative.
364  // Backward propagation.
365  flops += nb * nl; // Hadamard
366  flops += nlp * nl * (2.0 * nb - 1.0); // Weight gradients
367  flops += nl * (nb - 1); // Bias gradients
368  if (i > 0) {
369  flops += nlp * nb * (2.0 * nl - 1.0); // Previous layer gradients.
370  }
371  nlp = nl;
372  }
373  return flops;
374 }
375 
376 //______________________________________________________________________________
377 template<typename Architecture_t, typename Layer_t>
378 void TNet<Architecture_t, Layer_t>::SetDropoutProbabilities(
379  const std::vector<Double_t> & probabilities)
380 {
381  for (size_t i = 0; i < fLayers.size(); i++) {
382  if (i < probabilities.size()) {
383  fLayers[i].SetDropoutProbability(probabilities[i]);
384  } else {
385  fLayers[i].SetDropoutProbability(1.0);
386  }
387  }
388 }
389 
390 //______________________________________________________________________________
391 template<typename Architecture_t, typename Layer_t>
392  void TNet<Architecture_t, Layer_t>::Print()
393 {
394  std::cout << "DEEP NEURAL NETWORK:";
395  std::cout << " Loss function = " << static_cast<char>(fJ);
396  std::cout << ", Depth = " << fLayers.size() << std::endl;
397 
398  size_t i = 1;
399  for (auto & l : fLayers) {
400  std::cout << "DNN Layer " << i << ":" << std::endl;
401  l.Print();
402  i++;
403  }
404 
405 }
406 
407 } // namespace DNN
408 } // namespace TMVA
409 
410 #endif