18 #ifndef TMVA_DNN_ARCHITECTURES_REFERENCE
19 #define TMVA_DNN_ARCHITECTURES_REFERENCE
48 template<
typename AReal>
52 static TRandom * fgRandomGen;
55 using Scalar_t = AReal;
56 using Matrix_t = TMatrixT<AReal>;
57 using Tensor_t = TMatrixT<AReal>;
72 static void MultiplyTranspose(TMatrixT<Scalar_t> &output,
73 const TMatrixT<Scalar_t> &input,
74 const TMatrixT<Scalar_t> &weights);
76 static void AddRowWise(TMatrixT<Scalar_t> &output,
77 const TMatrixT<Scalar_t> &biases);
93 static void Backward(TMatrixT<Scalar_t> & activationGradientsBackward,
94 TMatrixT<Scalar_t> & weightGradients,
95 TMatrixT<Scalar_t> & biasGradients,
96 TMatrixT<Scalar_t> & df,
97 const TMatrixT<Scalar_t> & activationGradients,
98 const TMatrixT<Scalar_t> & weights,
99 const TMatrixT<Scalar_t> & activationBackward);
101 static Matrix_t & RecurrentLayerBackward(TMatrixT<Scalar_t> & state_gradients_backward,
102 TMatrixT<Scalar_t> & input_weight_gradients,
103 TMatrixT<Scalar_t> & state_weight_gradients,
104 TMatrixT<Scalar_t> & bias_gradients,
105 TMatrixT<Scalar_t> & df,
106 const TMatrixT<Scalar_t> & state,
107 const TMatrixT<Scalar_t> & weights_input,
108 const TMatrixT<Scalar_t> & weights_state,
109 const TMatrixT<Scalar_t> & input,
110 TMatrixT<Scalar_t> & input_gradient);
114 static void ScaleAdd(TMatrixT<Scalar_t> & A,
115 const TMatrixT<Scalar_t> & B,
116 Scalar_t beta = 1.0);
118 static void Copy(TMatrixT<Scalar_t> & A,
119 const TMatrixT<Scalar_t> & B);
122 template<
typename AMatrix_t>
123 static void CopyDiffArch(TMatrixT<Scalar_t> & A,
const AMatrix_t & B);
127 static void ScaleAdd(std::vector<TMatrixT<Scalar_t>> & A,
128 const std::vector<TMatrixT<Scalar_t>> & B,
129 Scalar_t beta = 1.0);
131 static void Copy(std::vector<TMatrixT<Scalar_t>> & A,
const std::vector<TMatrixT<Scalar_t>> & B);
134 template<
typename AMatrix_t>
135 static void CopyDiffArch(std::vector<TMatrixT<Scalar_t> > & A,
const std::vector<AMatrix_t> & B);
152 static void Identity(TMatrixT<AReal> & B);
153 static void IdentityDerivative(TMatrixT<AReal> & B,
154 const TMatrixT<AReal> & A);
156 static void Relu(TMatrixT<AReal> & B);
157 static void ReluDerivative(TMatrixT<AReal> & B,
158 const TMatrixT<AReal> & A);
160 static void Sigmoid(TMatrixT<AReal> & B);
161 static void SigmoidDerivative(TMatrixT<AReal> & B,
162 const TMatrixT<AReal> & A);
164 static void Tanh(TMatrixT<AReal> & B);
165 static void TanhDerivative(TMatrixT<AReal> & B,
166 const TMatrixT<AReal> & A);
168 static void SymmetricRelu(TMatrixT<AReal> & B);
169 static void SymmetricReluDerivative(TMatrixT<AReal> & B,
170 const TMatrixT<AReal> & A);
172 static void SoftSign(TMatrixT<AReal> & B);
173 static void SoftSignDerivative(TMatrixT<AReal> & B,
174 const TMatrixT<AReal> & A);
176 static void Gauss(TMatrixT<AReal> & B);
177 static void GaussDerivative(TMatrixT<AReal> & B,
178 const TMatrixT<AReal> & A);
196 static AReal MeanSquaredError(
const TMatrixT<AReal> &Y,
const TMatrixT<AReal> &output,
197 const TMatrixT<AReal> &weights);
198 static void MeanSquaredErrorGradients(TMatrixT<AReal> &dY,
const TMatrixT<AReal> &Y,
const TMatrixT<AReal> &output,
199 const TMatrixT<AReal> &weights);
203 static AReal CrossEntropy(
const TMatrixT<AReal> &Y,
const TMatrixT<AReal> &output,
const TMatrixT<AReal> &weights);
205 static void CrossEntropyGradients(TMatrixT<AReal> &dY,
const TMatrixT<AReal> &Y,
const TMatrixT<AReal> &output,
206 const TMatrixT<AReal> &weights);
210 static AReal SoftmaxCrossEntropy(
const TMatrixT<AReal> &Y,
const TMatrixT<AReal> &output,
211 const TMatrixT<AReal> &weights);
212 static void SoftmaxCrossEntropyGradients(TMatrixT<AReal> &dY,
const TMatrixT<AReal> &Y,
213 const TMatrixT<AReal> &output,
const TMatrixT<AReal> &weights);
229 static void Sigmoid(TMatrixT<AReal> &YHat,
230 const TMatrixT<AReal> & );
231 static void Softmax(TMatrixT<AReal> &YHat,
232 const TMatrixT<AReal> & );
249 static AReal L1Regularization(
const TMatrixT<AReal> & W);
250 static void AddL1RegularizationGradients(TMatrixT<AReal> & A,
251 const TMatrixT<AReal> & W,
254 static AReal L2Regularization(
const TMatrixT<AReal> & W);
255 static void AddL2RegularizationGradients(TMatrixT<AReal> & A,
256 const TMatrixT<AReal> & W,
272 static void InitializeGauss(TMatrixT<AReal> & A);
274 static void InitializeUniform(TMatrixT<AReal> & A);
276 static void InitializeIdentity(TMatrixT<AReal> & A);
278 static void InitializeZero(TMatrixT<AReal> & A);
280 static void InitializeGlorotUniform(TMatrixT<AReal> & A);
282 static void InitializeGlorotNormal(TMatrixT<AReal> & A);
286 static TRandom & GetRandomGenerator();
289 static void SetRandomSeed(
size_t seed);
306 static void DropoutForward(Tensor_t &A, TDescriptors *descriptors, TWorkspace *workspace, Scalar_t p);
307 static void DropoutForward(Matrix_t &A, Scalar_t p)
310 DropoutForward(tA, static_cast<TDescriptors *>(
nullptr), static_cast<TWorkspace *>(
nullptr), p);
327 static void Im2col(TMatrixT<AReal> &A,
328 const TMatrixT<AReal> &B,
335 size_t zeroPaddingHeight,
336 size_t zeroPaddingWidth);
338 static void Im2colIndices(std::vector<int> &,
const TMatrixT<AReal> &,
size_t,
size_t,
size_t,
size_t ,
339 size_t ,
size_t ,
size_t ,
size_t ,
size_t ) {
340 Fatal(
"Im2ColIndices",
"This function is not implemented for ref architectures");
342 static void Im2colFast(TMatrixT<AReal> &,
const TMatrixT<AReal> &,
const std::vector<int> & ) {
343 Fatal(
"Im2ColFast",
"This function is not implemented for ref architectures");
348 static void RotateWeights(TMatrixT<AReal> &A,
const TMatrixT<AReal> &B,
size_t filterDepth,
size_t filterHeight,
349 size_t filterWidth,
size_t numFilters);
352 static void AddConvBiases(TMatrixT<AReal> &output,
const TMatrixT<AReal> &biases);
356 static void PrepareInternals(std::vector<TMatrixT<AReal>> &) {}
359 static void ConvLayerForward(std::vector<TMatrixT<AReal>> & ,
360 std::vector<TMatrixT<AReal>> & ,
361 const std::vector<TMatrixT<AReal>> & ,
362 const TMatrixT<AReal> & ,
const TMatrixT<AReal> & ,
363 const DNN::CNN::TConvParams & , EActivationFunction ,
364 std::vector<TMatrixT<AReal>> & ) {
365 Fatal(
"ConvLayerForward",
"This function is not implemented for ref architectures");
381 static void ConvLayerBackward(std::vector<TMatrixT<AReal>> &,
382 TMatrixT<AReal> &, TMatrixT<AReal> &,
383 std::vector<TMatrixT<AReal>> &,
384 const std::vector<TMatrixT<AReal>> &,
385 const TMatrixT<AReal> &,
const std::vector<TMatrixT<AReal>> &,
386 size_t ,
size_t ,
size_t ,
size_t ,
size_t,
387 size_t ,
size_t ,
size_t ,
size_t ,
size_t) {
388 Fatal(
"ConvLayerBackward",
"This function is not implemented for ref architectures");
392 #ifdef HAVE_CNN_REFERENCE
395 static void CalculateConvActivationGradients(std::vector<TMatrixT<AReal>> &activationGradientsBackward,
396 const std::vector<TMatrixT<AReal>> &df,
const TMatrixT<AReal> &weights,
397 size_t batchSize,
size_t inputHeight,
size_t inputWidth,
size_t depth,
398 size_t height,
size_t width,
size_t filterDepth,
size_t filterHeight,
403 static void CalculateConvWeightGradients(TMatrixT<AReal> &weightGradients,
const std::vector<TMatrixT<AReal>> &df,
404 const std::vector<TMatrixT<AReal>> &activationBackward,
size_t batchSize,
405 size_t inputHeight,
size_t inputWidth,
size_t depth,
size_t height,
406 size_t width,
size_t filterDepth,
size_t filterHeight,
size_t filterWidth,
411 static void CalculateConvBiasGradients(TMatrixT<AReal> &biasGradients,
const std::vector<TMatrixT<AReal>> &df,
412 size_t batchSize,
size_t depth,
size_t nLocalViews);
428 static void Downsample(TMatrixT<AReal> &A, TMatrixT<AReal> &B,
const TMatrixT<AReal> &C,
size_t imgHeight,
429 size_t imgWidth,
size_t fltHeight,
size_t fltWidth,
size_t strideRows,
size_t strideCols);
440 static void MaxPoolLayerBackward(TMatrixT<AReal> &activationGradientsBackward,
441 const TMatrixT<AReal> &activationGradients,
442 const TMatrixT<AReal> &indexMatrix,
460 static void Reshape(TMatrixT<AReal> &A,
const TMatrixT<AReal> &B);
463 static void Flatten(TMatrixT<AReal> &A,
const std::vector<TMatrixT<AReal>> &B,
size_t size,
size_t nRows,
467 static void Deflatten(std::vector<TMatrixT<AReal>> &A,
const TMatrixT<Scalar_t> &B,
size_t index,
size_t nRows,
470 static void Rearrange(std::vector<TMatrixT<AReal>> &out,
const std::vector<TMatrixT<AReal>> &in);
482 static void SumColumns(TMatrixT<AReal> &B,
const TMatrixT<AReal> &A);
487 static void Hadamard(TMatrixT<AReal> &A,
const TMatrixT<AReal> &B);
492 static void ConstAdd(TMatrixT<AReal> &A, AReal beta);
497 static void ConstMult(TMatrixT<AReal> &A, AReal beta);
502 static void ReciprocalElementWise(TMatrixT<AReal> &A);
507 static void SquareElementWise(TMatrixT<AReal> &A);
512 static void SqrtElementWise(TMatrixT<AReal> &A);
517 static void AdamUpdate(TMatrixT<AReal> & A,
const TMatrixT<AReal> & M,
const TMatrixT<AReal> & V, AReal alpha, AReal eps);
518 static void AdamUpdateFirstMom(TMatrixT<AReal> & A,
const TMatrixT<AReal> & B, AReal beta);
519 static void AdamUpdateSecondMom(TMatrixT<AReal> & A,
const TMatrixT<AReal> & B, AReal beta);
529 static void AddBiases(TMatrixT<AReal> &A,
530 const TMatrixT<AReal> &biases);
535 UpdateParams(TMatrixT<AReal> &x, TMatrixT<AReal> &tildeX, TMatrixT<AReal> &y,
536 TMatrixT<AReal> &z, TMatrixT<AReal> &fVBiases,
537 TMatrixT<AReal> &fHBiases, TMatrixT<AReal> &fWeights,
538 TMatrixT<AReal> &VBiasError, TMatrixT<AReal> &HBiasError,
539 AReal learningRate,
size_t fBatchSize);
542 static void SoftmaxAE(TMatrixT<AReal> & A);
547 static void CorruptInput(TMatrixT<AReal> & input,
548 TMatrixT<AReal> & corruptedInput,
549 AReal corruptionLevel);
552 static void EncodeInput(TMatrixT<AReal> &input,
553 TMatrixT<AReal> &compressedInput,
554 TMatrixT<AReal> &Weights);
558 static void ReconstructInput(TMatrixT<AReal> & compressedInput,
559 TMatrixT<AReal> & reconstructedInput,
560 TMatrixT<AReal> &fWeights);
563 static void ForwardLogReg(TMatrixT<AReal> &input,
565 TMatrixT<AReal> &fWeights);
567 static void UpdateParamsLogReg(TMatrixT<AReal> &input,
568 TMatrixT<AReal> &output,
569 TMatrixT<AReal> &difference,
571 TMatrixT<AReal> &fWeights,
572 TMatrixT<AReal> &fBiases,
580 template <
typename AReal>
581 template <
typename AMatrix_t>
582 void TReference<AReal>::CopyDiffArch(TMatrixT<AReal> &A,
const AMatrix_t &B)
584 TMatrixT<AReal> tmp = B;
588 template <
typename AReal>
589 template <
typename AMatrix_t>
590 void TReference<AReal>::CopyDiffArch(std::vector<TMatrixT<AReal>> &A,
const std::vector<AMatrix_t> &B)
592 for (
size_t i = 0; i < A.size(); ++i) {
593 CopyDiffArch(A[i], B[i]);