17 #ifndef TMVA_DNN_LAYER
18 #define TMVA_DNN_LAYER
23 #include "Functions.h"
51 template<
typename Architecture_t>
56 using Scalar_t =
typename Architecture_t::Scalar_t;
57 using Matrix_t =
typename Architecture_t::Matrix_t;
58 using Tensor_t =
typename Architecture_t::Tensor_t;
67 Scalar_t fDropoutProbability;
72 Matrix_t fDerivatives;
73 Matrix_t fWeightGradients;
74 Matrix_t fBiasGradients;
75 Matrix_t fActivationGradients;
77 EActivationFunction fF;
81 TLayer(
size_t BatchSize,
84 EActivationFunction f,
85 Scalar_t dropoutProbability);
86 TLayer(
const TLayer &);
90 void Initialize(EInitialization m);
96 void inline Forward(Matrix_t & input,
bool applyDropout =
false);
101 void inline Backward(Matrix_t & gradients_backward,
102 const Matrix_t & activations_backward,
104 Scalar_t weightDecay);
108 size_t GetBatchSize()
const {
return fBatchSize;}
109 size_t GetInputWidth()
const {
return fInputWidth;}
110 size_t GetWidth()
const {
return fWidth;}
111 size_t GetDropoutProbability()
const {
return fDropoutProbability;}
113 void SetDropoutProbability(Scalar_t p) {fDropoutProbability = p;}
115 EActivationFunction GetActivationFunction()
const {
return fF;}
117 Matrix_t & GetOutput() {
return fOutput;}
118 const Matrix_t & GetOutput()
const {
return fOutput;}
119 Matrix_t & GetWeights() {
return fWeights;}
120 const Matrix_t & GetWeights()
const {
return fWeights;}
121 Matrix_t & GetBiases() {
return fBiases;}
122 const Matrix_t & GetBiases()
const {
return fBiases;}
123 Matrix_t & GetActivationGradients() {
return fActivationGradients;}
124 const Matrix_t & GetActivationGradients()
const {
return fActivationGradients;}
125 Matrix_t & GetBiasGradients() {
return fBiasGradients;}
126 const Matrix_t & GetBiasGradients()
const {
return fBiasGradients;}
127 Matrix_t & GetWeightGradients() {
return fWeightGradients;}
128 const Matrix_t & GetWeightGradients()
const {
return fWeightGradients;}
146 template<
typename Architecture_t>
152 using Scalar_t =
typename Architecture_t::Scalar_t;
153 using Matrix_t =
typename Architecture_t::Matrix_t;
154 using Tensor_t =
typename Architecture_t::Tensor_t;
163 Scalar_t fDropoutProbability;
168 Matrix_t fDerivatives;
169 Matrix_t fWeightGradients;
170 Matrix_t fBiasGradients;
171 Matrix_t fActivationGradients;
173 EActivationFunction fF;
177 TSharedLayer(
size_t fBatchSize,
178 TLayer<Architecture_t> & layer);
179 TSharedLayer(
const TSharedLayer & layer);
186 void inline Forward(Matrix_t & input,
bool applyDropout =
false);
191 void inline Backward(Matrix_t & gradients_backward,
192 const Matrix_t & activations_backward,
194 Scalar_t weightDecay);
198 size_t GetBatchSize()
const {
return fBatchSize;}
199 size_t GetInputWidth()
const {
return fInputWidth;}
200 size_t GetWidth()
const {
return fWidth;}
201 size_t GetDropoutProbability()
const {
return fDropoutProbability;}
203 void SetDropoutProbability(Scalar_t p) {fDropoutProbability = p;}
205 EActivationFunction GetActivationFunction()
const {
return fF;}
207 Matrix_t & GetOutput() {
return fOutput;}
208 const Matrix_t & GetOutput()
const {
return fOutput;}
209 Matrix_t & GetWeights()
const {
return fWeights;}
210 Matrix_t & GetBiases() {
return fBiases;}
211 const Matrix_t & GetBiases()
const {
return fBiases;}
212 Matrix_t & GetActivationGradients() {
return fActivationGradients;}
213 const Matrix_t & GetActivationGradients()
const {
return fActivationGradients;}
214 Matrix_t & GetBiasGradients() {
return fBiasGradients;}
215 const Matrix_t & GetBiasGradients()
const {
return fBiasGradients;}
216 Matrix_t & GetWeightGradients() {
return fWeightGradients;}
217 const Matrix_t & GetWeightGradients()
const {
return fWeightGradients;}
226 template<
typename Architecture_t>
227 TLayer<Architecture_t>::TLayer(
size_t batchSize,
230 EActivationFunction f,
231 Scalar_t dropoutProbability)
232 : fBatchSize(batchSize), fInputWidth(inputWidth), fWidth(width),
233 fDropoutProbability(dropoutProbability), fWeights(width, fInputWidth),
234 fBiases(width, 1), fOutput(fBatchSize, width), fDerivatives(fBatchSize, width),
235 fWeightGradients(width, fInputWidth), fBiasGradients(width, 1),
236 fActivationGradients(fBatchSize, width), fF(f)
242 template<
typename Architecture_t>
243 TLayer<Architecture_t>::TLayer(
const TLayer &layer)
244 : fBatchSize(layer.fBatchSize), fInputWidth(layer.fInputWidth),
245 fWidth(layer.fWidth), fDropoutProbability(layer.fDropoutProbability),
246 fWeights(layer.fWidth, layer.fInputWidth), fBiases(layer.fWidth, 1),
247 fOutput(layer.fBatchSize, layer.fWidth),
248 fDerivatives(layer.fBatchSize, layer.fWidth),
249 fWeightGradients(layer.fWidth, layer.fInputWidth),
250 fBiasGradients(layer.fWidth, 1),
251 fActivationGradients(layer.fBatchSize, layer.fWidth),
254 Architecture_t::Copy(fWeights, layer.GetWeights());
255 Architecture_t::Copy(fBiases, layer.GetBiases());
259 template<
typename Architecture_t>
260 auto TLayer<Architecture_t>::Initialize(EInitialization m)
263 initialize<Architecture_t>(fWeights, m);
264 initialize<Architecture_t>(fBiases, EInitialization::kZero);
268 template<
typename Architecture_t>
269 auto inline TLayer<Architecture_t>::Forward(Matrix_t & input,
273 if (applyDropout && (fDropoutProbability != 1.0)) {
274 Architecture_t::DropoutForward(input, fDropoutProbability);
276 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
277 Architecture_t::AddRowWise(fOutput, fBiases);
278 Tensor_t tOutput(fOutput);
279 Tensor_t tDerivatives(fDerivatives);
280 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
282 evaluate<Architecture_t>(tOutput, fF);
286 template<
typename Architecture_t>
287 auto TLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
288 const Matrix_t & activations_backward,
290 Scalar_t weightDecay)
294 Tensor_t tGradBw(gradients_backward);
295 Tensor_t tActBw(activations_backward);
296 Tensor_t tActGrad(fActivationGradients);
297 Tensor_t tDeriv(fDerivatives);
299 Architecture_t::Hadamard( tDeriv, tActGrad);
300 Architecture_t::Backward( tGradBw,
307 addRegularizationGradients<Architecture_t>(fWeightGradients,
313 template<
typename Architecture_t>
314 void TLayer<Architecture_t>::Print()
const
316 std::cout <<
"Width = " << fWeights.GetNrows();
317 std::cout <<
", Activation Function = ";
318 std::cout << static_cast<int>(fF) << std::endl;
327 template<
typename Architecture_t>
328 TSharedLayer<Architecture_t>::TSharedLayer(
size_t BatchSize,
329 TLayer<Architecture_t> &layer)
330 : fBatchSize(BatchSize),
331 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
332 fDropoutProbability(layer.GetDropoutProbability()),
333 fWeights(layer.GetWeights()), fBiases(layer.GetBiases()),
334 fOutput(fBatchSize, fWidth), fDerivatives(fBatchSize, fWidth),
335 fWeightGradients(fWidth, fInputWidth), fBiasGradients(fWidth, 1),
336 fActivationGradients(fBatchSize, fWidth), fF(layer.GetActivationFunction())
342 template<
typename Architecture_t>
343 TSharedLayer<Architecture_t>::TSharedLayer(
const TSharedLayer &layer)
344 : fBatchSize(layer.fBatchSize),
345 fInputWidth(layer.GetInputWidth()), fWidth(layer.GetWidth()),
346 fDropoutProbability(layer.fDropoutProbability), fWeights(layer.fWeights),
347 fBiases(layer.fBiases), fOutput(layer.fBatchSize, fWidth),
348 fDerivatives(layer.fBatchSize, fWidth), fWeightGradients(fWidth, fInputWidth),
349 fBiasGradients(fWidth, 1), fActivationGradients(layer.fBatchSize, fWidth),
355 template<
typename Architecture_t>
356 auto inline TSharedLayer<Architecture_t>::Forward(Matrix_t & input,
360 if (applyDropout && (fDropoutProbability != 1.0)) {
361 Architecture_t::DropoutForward(input, fDropoutProbability);
363 Architecture_t::MultiplyTranspose(fOutput, input, fWeights);
364 Architecture_t::AddRowWise(fOutput, fBiases);
365 Tensor_t tOutput(fOutput);
366 Tensor_t tDerivatives(fDerivatives);
367 evaluateDerivative<Architecture_t>(tDerivatives, fF, tOutput);
368 evaluate<Architecture_t>(tOutput, fF);
372 template<
typename Architecture_t>
373 auto inline TSharedLayer<Architecture_t>::Backward(Matrix_t & gradients_backward,
374 const Matrix_t & activations_backward,
376 Scalar_t weightDecay)
379 Architecture_t::Backward(gradients_backward,
383 fActivationGradients,
385 activations_backward);
386 addRegularizationGradients<Architecture_t>(fWeightGradients,
392 template<
typename Architecture_t>
393 void TSharedLayer<Architecture_t>::Print()
const
395 std::cout <<
"Width = " << fWeights.GetNrows();
396 std::cout <<
", Activation Function = ";
397 std::cout << static_cast<int>(fF) << std::endl;