Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
Functions.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/06/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /////////////////////////////////////////////////////////////////////
13 // Contains function enums for activation and output functions, as //
14 // well as generic evaluation functions, that delegate the call to //
15 // the corresponding evaluation kernel. //
16 /////////////////////////////////////////////////////////////////////
17 
18 #ifndef TMVA_DNN_FUNCTIONS
19 #define TMVA_DNN_FUNCTIONS
20 
21 namespace TMVA
22 {
23 namespace DNN
24 {
25 //______________________________________________________________________________
26 //
27 // Enum Definitions
28 //______________________________________________________________________________
29 
30 /*! Enum that represents layer activation functions. */
31 enum class EActivationFunction
32 {
33  kIdentity = 0,
34  kRelu = 1,
35  kSigmoid = 2,
36  kTanh = 3,
37  kSymmRelu = 4,
38  kSoftSign = 5,
39  kGauss = 6
40 };
41 
42 /*! Enum that represents output functions */
43 enum class EOutputFunction
44 {
45  kIdentity = 'I',
46  kSigmoid = 'S',
47  kSoftmax = 'M'
48 };
49 
50 /*! Enum that represents objective functions for the net, i.e. functions
51 * that take the output from the last layer in the net together with the
52 * truths and return the objective function values that is to be minimized
53 * in the training process. */
54 enum class ELossFunction
55 {
56  kCrossEntropy = 'C',
57  kMeanSquaredError = 'R',
58  kSoftmaxCrossEntropy = 'S'
59 };
60 
61 /*! Enum representing the regularization type applied for a given layer */
62 enum class ERegularization
63 {
64  kNone = '0',
65  kL1 = '1',
66  kL2 = '2'
67  };
68 
69 /* Enum represnting the initialization method used for this layer. */
70 enum class EInitialization {
71  kGauss = 'G',
72  kUniform = 'U',
73  kIdentity = 'I',
74  kZero = 'Z',
75  kGlorotNormal = 'X',
76  kGlorotUniform = 'F',
77 };
78 
79 /// Enum representing the optimizer used for training.
80 enum class EOptimizer {
81  kSGD = 0,
82  kAdam = 1,
83  kAdagrad = 2,
84  kRMSProp = 3,
85  kAdadelta = 4,
86 };
87 
88 //______________________________________________________________________________
89 //
90 // Activation Functions
91 //______________________________________________________________________________
92 
93 /*! Apply the given activation function to each value in the given
94 * tensor A. */
95 template<typename Architecture_t>
96 inline void evaluate(typename Architecture_t::Tensor_t &A,
97  EActivationFunction f)
98 {
99  switch(f)
100  {
101  case EActivationFunction::kIdentity : break;
102  case EActivationFunction::kRelu : Architecture_t::Relu(A);
103  break;
104  case EActivationFunction::kSigmoid : Architecture_t::Sigmoid(A);
105  break;
106  case EActivationFunction::kTanh : Architecture_t::Tanh(A);
107  break;
108  case EActivationFunction::kSymmRelu : Architecture_t::SymmetricRelu(A);
109  break;
110  case EActivationFunction::kSoftSign : Architecture_t::SoftSign(A);
111  break;
112  case EActivationFunction::kGauss : Architecture_t::Gauss(A);
113  break;
114  }
115 }
116 
117 /*! Compute the first partial derivative of the activation function for
118 * the values given in tensor A and write the results into B. */
119 //______________________________________________________________________________
120 template<typename Architecture_t>
121 inline void evaluateDerivative(typename Architecture_t::Tensor_t & B,
122  EActivationFunction f,
123  const typename Architecture_t::Tensor_t & A)
124 {
125  switch(f)
126  {
127  case EActivationFunction::kIdentity : Architecture_t::IdentityDerivative(B, A);
128  break;
129  case EActivationFunction::kRelu : Architecture_t::ReluDerivative(B, A);
130  break;
131  case EActivationFunction::kSigmoid : Architecture_t::SigmoidDerivative(B, A);
132  break;
133  case EActivationFunction::kTanh : Architecture_t::TanhDerivative(B, A);
134  break;
135  case EActivationFunction::kSymmRelu : Architecture_t::SymmetricReluDerivative(B, A);
136  break;
137  case EActivationFunction::kSoftSign : Architecture_t::SoftSignDerivative(B, A);
138  break;
139  case EActivationFunction::kGauss : Architecture_t::GaussDerivative(B, A);
140  break;
141  }
142 }
143 //______________________________________________________________________________
144 //
145 // Output Functions
146 //______________________________________________________________________________
147 
148 /*! Apply the given output function to each value in the given
149 * tensor A. */
150 template<typename Architecture_t>
151 inline void evaluate(typename Architecture_t::Matrix_t &A,
152  EOutputFunction f,
153  const typename Architecture_t::Matrix_t &X)
154 {
155  switch(f)
156  {
157  case EOutputFunction::kIdentity : Architecture_t::Copy(A, X);
158  break;
159  case EOutputFunction::kSigmoid : Architecture_t::Sigmoid(A, X);
160  break;
161  case EOutputFunction::kSoftmax : Architecture_t::Softmax(A, X);
162  break;
163  }
164 }
165 
166 //______________________________________________________________________________
167 //
168 // Loss Functions
169 //______________________________________________________________________________
170 
171 /*! Compute the value of the objective function f for given activations
172 * of the ouput layer and the truth Y. */
173 template <typename Architecture_t>
174 inline auto evaluate(ELossFunction f, const typename Architecture_t::Matrix_t &Y,
175  const typename Architecture_t::Matrix_t &output, const typename Architecture_t::Matrix_t &weights)
176  -> decltype(Architecture_t::CrossEntropy(Y, output, weights))
177 {
178  switch(f)
179  {
180  case ELossFunction::kCrossEntropy: return Architecture_t::CrossEntropy(Y, output, weights);
181  case ELossFunction::kMeanSquaredError: return Architecture_t::MeanSquaredError(Y, output, weights);
182  case ELossFunction::kSoftmaxCrossEntropy: return Architecture_t::SoftmaxCrossEntropy(Y, output, weights);
183  }
184  return 0.0;
185 }
186 
187 /*! Compute the gradient of the given output function f for given activations
188 * output of the output layer and truth Y and write the results into dY. */
189 //______________________________________________________________________________
190 template <typename Architecture_t>
191 inline void evaluateGradients(typename Architecture_t::Matrix_t &dY, ELossFunction f,
192  const typename Architecture_t::Matrix_t &Y,
193  const typename Architecture_t::Matrix_t &output,
194  const typename Architecture_t::Matrix_t &weights)
195 {
196  switch(f)
197  {
198  case ELossFunction::kCrossEntropy: Architecture_t::CrossEntropyGradients(dY, Y, output, weights); break;
199  case ELossFunction::kMeanSquaredError: Architecture_t::MeanSquaredErrorGradients(dY, Y, output, weights); break;
200  case ELossFunction::kSoftmaxCrossEntropy :
201  Architecture_t::SoftmaxCrossEntropyGradients(dY, Y, output, weights);
202  break;
203  }
204 }
205 
206 
207 //______________________________________________________________________________
208 //
209 // Regularization
210 //______________________________________________________________________________
211 
212 /*! Evaluate the regularization functional for a given weight matrix. */
213 template<typename Architecture_t>
214 inline auto regularization(const typename Architecture_t::Matrix_t &A,
215  ERegularization R)
216 -> decltype(Architecture_t::L1Regularization(A))
217 {
218  switch(R)
219  {
220  case ERegularization::kNone :
221  return 0.0;
222  case ERegularization::kL1 :
223  return Architecture_t::L1Regularization(A);
224  case ERegularization::kL2 :
225  return Architecture_t::L2Regularization(A);
226  }
227  return 0.0;
228 }
229 
230 /*! Add the regularization gradient corresponding to weight matrix W, to
231 * the matrix A. */
232 //______________________________________________________________________________
233 template<typename Architecture_t>
234 inline void addRegularizationGradients(typename Architecture_t::Matrix_t &A,
235  const typename Architecture_t::Matrix_t &W,
236  typename Architecture_t::Scalar_t weightDecay,
237  ERegularization R)
238 {
239  switch(R)
240  {
241  case ERegularization::kNone :
242  break;
243  case ERegularization::kL1 :
244  Architecture_t::AddL1RegularizationGradients(A, W, weightDecay);
245  break;
246  case ERegularization::kL2 :
247  Architecture_t::AddL2RegularizationGradients(A, W, weightDecay);
248  break;
249  }
250 }
251 
252 //______________________________________________________________________________
253 //
254 // Initialization
255 //______________________________________________________________________________
256 
257 template<typename Architecture_t>
258 inline void initialize(typename Architecture_t::Matrix_t & A,
259  EInitialization m)
260 {
261  switch(m) {
262  case EInitialization::kGauss : Architecture_t::InitializeGauss(A);
263  break;
264  case EInitialization::kUniform : Architecture_t::InitializeUniform(A);
265  break;
266  case EInitialization::kIdentity : Architecture_t::InitializeIdentity(A);
267  break;
268  case EInitialization::kZero : Architecture_t::InitializeZero(A);
269  break;
270  case EInitialization::kGlorotNormal : Architecture_t::InitializeGlorotNormal(A);
271  break;
272  case EInitialization::kGlorotUniform : Architecture_t::InitializeGlorotUniform(A);
273  break;
274  }
275 }
276 
277 } // namespace DNN
278 } // namespace TMVA
279 
280 #endif