29 #ifndef TMVA_DNN_DEEPNET
30 #define TMVA_DNN_DEEPNET
48 #include "TMVA/DNN/DAE/CompressionLayer.h"
49 #include "TMVA/DNN/DAE/CorruptionLayer.h"
50 #include "TMVA/DNN/DAE/ReconstructionLayer.h"
51 #include "TMVA/DNN/DAE/LogisticRegressionLayer.h"
71 template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>>
75 using Tensor_t =
typename Architecture_t::Tensor_t;
76 using Matrix_t =
typename Architecture_t::Matrix_t;
77 using Scalar_t =
typename Architecture_t::Scalar_t;
81 bool inline isInteger(Scalar_t x)
const {
return x == floor(x); }
82 size_t calculateDimension(
int imgDim,
int fltDim,
int padding,
int stride);
85 std::vector<Layer_t *> fLayers;
101 Scalar_t fWeightDecay;
108 TDeepNet(
size_t BatchSize,
size_t InputDepth,
size_t InputHeight,
size_t InputWidth,
size_t BatchDepth,
109 size_t BatchHeight,
size_t BatchWidth, ELossFunction fJ, EInitialization fI = EInitialization::kZero,
110 ERegularization fR = ERegularization::kNone, Scalar_t fWeightDecay = 0.0,
bool isTraining =
false);
113 TDeepNet(
const TDeepNet &);
123 TConvLayer<Architecture_t> *AddConvLayer(
size_t depth,
size_t filterHeight,
size_t filterWidth,
size_t strideRows,
124 size_t strideCols,
size_t paddingHeight,
size_t paddingWidth,
125 EActivationFunction f, Scalar_t dropoutProbability = 1.0);
129 void AddConvLayer(TConvLayer<Architecture_t> *convLayer);
136 TMaxPoolLayer<Architecture_t> *AddMaxPoolLayer(
size_t frameHeight,
size_t frameWidth,
size_t strideRows,
137 size_t strideCols, Scalar_t dropoutProbability = 1.0);
140 void AddMaxPoolLayer(CNN::TMaxPoolLayer<Architecture_t> *maxPoolLayer);
145 TBasicRNNLayer<Architecture_t> *AddBasicRNNLayer(
size_t stateSize,
size_t inputSize,
size_t timeSteps,
146 bool rememberState =
false,EActivationFunction f = EActivationFunction::kTanh);
150 void AddBasicRNNLayer(TBasicRNNLayer<Architecture_t> *basicRNNLayer);
156 TDenseLayer<Architecture_t> *AddDenseLayer(
size_t width, EActivationFunction f, Scalar_t dropoutProbability = 1.0);
160 void AddDenseLayer(TDenseLayer<Architecture_t> *denseLayer);
165 TReshapeLayer<Architecture_t> *AddReshapeLayer(
size_t depth,
size_t height,
size_t width,
bool flattening);
168 TBatchNormLayer<Architecture_t> *AddBatchNormLayer(Scalar_t momentum = -1, Scalar_t epsilon = 0.0001);
172 void AddReshapeLayer(TReshapeLayer<Architecture_t> *reshapeLayer);
178 TCorruptionLayer<Architecture_t> *AddCorruptionLayer(
size_t visibleUnits,
size_t hiddenUnits,
179 Scalar_t dropoutProbability, Scalar_t corruptionLevel);
183 void AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer);
188 TCompressionLayer<Architecture_t> *AddCompressionLayer(
size_t visibleUnits,
size_t hiddenUnits,
189 Scalar_t dropoutProbability, EActivationFunction f,
190 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases);
194 void AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer);
200 TReconstructionLayer<Architecture_t> *AddReconstructionLayer(
size_t visibleUnits,
size_t hiddenUnits,
201 Scalar_t learningRate, EActivationFunction f,
202 std::vector<Matrix_t> weights,
203 std::vector<Matrix_t> biases, Scalar_t corruptionLevel,
204 Scalar_t dropoutProbability);
208 void AddReconstructionLayer(TReconstructionLayer<Architecture_t> *reconstructionLayer);
212 TLogisticRegressionLayer<Architecture_t> *AddLogisticRegressionLayer(
size_t inputUnits,
size_t outputUnits,
213 size_t testDataBatchSize,
214 Scalar_t learningRate);
218 void AddLogisticRegressionLayer(TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer);
222 void PreTrain(std::vector<Matrix_t> &input, std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
223 Scalar_t corruptionLevel, Scalar_t dropoutProbability,
size_t epochs, EActivationFunction f,
224 bool applyDropout =
false);
229 void FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput, std::vector<Matrix_t> &outputLabel,
230 size_t outputUnits,
size_t testDataBatchSize, Scalar_t learningRate,
size_t epochs);
237 void Forward(Tensor_t &input,
bool applyDropout =
false);
240 void ResetTraining();
245 void Backward(
const Tensor_t &input,
const Matrix_t &groundTruth,
const Matrix_t &weights);
248 #ifdef USE_PARALLEL_DEEPNET
251 void ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
252 std::vector<TTensorBatch<Architecture_t>> &batches,
bool applyDropout =
false);
257 void ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
258 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate);
263 void ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
264 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
270 void ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
271 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t learningRate,
274 #endif // endif use parallel deepnet
278 void Update(Scalar_t learningRate);
282 Scalar_t Loss(
const Matrix_t &groundTruth,
const Matrix_t &weights,
bool includeRegularization =
true)
const;
285 Scalar_t Loss(Tensor_t &input,
const Matrix_t &groundTruth,
const Matrix_t &weights,
286 bool inTraining =
false,
bool includeRegularization =
true);
289 Scalar_t RegularizationTerm()
const;
292 void Prediction(Matrix_t &predictions, EOutputFunction f)
const;
295 void Prediction(Matrix_t &predictions, Tensor_t & input, EOutputFunction f);
301 inline Layer_t *GetLayerAt(
size_t i) {
return fLayers[i]; }
302 inline const Layer_t *GetLayerAt(
size_t i)
const {
return fLayers[i]; }
305 inline size_t GetDepth()
const {
return fLayers.size(); }
306 inline size_t GetOutputWidth()
const {
return fLayers.back()->GetWidth(); }
309 inline std::vector<Layer_t *> &GetLayers() {
return fLayers; }
310 inline const std::vector<Layer_t *> &GetLayers()
const {
return fLayers; }
313 inline void Clear() { fLayers.clear(); }
316 inline size_t GetBatchSize()
const {
return fBatchSize; }
317 inline size_t GetInputDepth()
const {
return fInputDepth; }
318 inline size_t GetInputHeight()
const {
return fInputHeight; }
319 inline size_t GetInputWidth()
const {
return fInputWidth; }
321 inline size_t GetBatchDepth()
const {
return fBatchDepth; }
322 inline size_t GetBatchHeight()
const {
return fBatchHeight; }
323 inline size_t GetBatchWidth()
const {
return fBatchWidth; }
325 inline bool IsTraining()
const {
return fIsTraining; }
327 inline ELossFunction GetLossFunction()
const {
return fJ; }
328 inline EInitialization GetInitialization()
const {
return fI; }
329 inline ERegularization GetRegularization()
const {
return fR; }
330 inline Scalar_t GetWeightDecay()
const {
return fWeightDecay; }
336 inline void SetBatchSize(
size_t batchSize) { fBatchSize = batchSize; }
337 inline void SetInputDepth(
size_t inputDepth) { fInputDepth = inputDepth; }
338 inline void SetInputHeight(
size_t inputHeight) { fInputHeight = inputHeight; }
339 inline void SetInputWidth(
size_t inputWidth) { fInputWidth = inputWidth; }
340 inline void SetBatchDepth(
size_t batchDepth) { fBatchDepth = batchDepth; }
341 inline void SetBatchHeight(
size_t batchHeight) { fBatchHeight = batchHeight; }
342 inline void SetBatchWidth(
size_t batchWidth) { fBatchWidth = batchWidth; }
343 inline void SetLossFunction(ELossFunction J) { fJ = J; }
344 inline void SetInitialization(EInitialization I) { fI = I; }
345 inline void SetRegularization(ERegularization R) { fR = R; }
346 inline void SetWeightDecay(Scalar_t weightDecay) { fWeightDecay = weightDecay; }
348 void SetDropoutProbabilities(
const std::vector<Double_t> & probabilities);
356 template <
typename Architecture_t,
typename Layer_t>
357 TDeepNet<Architecture_t, Layer_t>::TDeepNet()
358 : fLayers(), fBatchSize(0), fInputDepth(0), fInputHeight(0), fInputWidth(0), fBatchDepth(0), fBatchHeight(0),
359 fBatchWidth(0), fJ(ELossFunction::kMeanSquaredError), fI(EInitialization::kZero), fR(ERegularization::kNone),
360 fIsTraining(true), fWeightDecay(0.0)
366 template <
typename Architecture_t,
typename Layer_t>
367 TDeepNet<Architecture_t, Layer_t>::TDeepNet(
size_t batchSize,
size_t inputDepth,
size_t inputHeight,
size_t inputWidth,
368 size_t batchDepth,
size_t batchHeight,
size_t batchWidth, ELossFunction J,
369 EInitialization I, ERegularization R, Scalar_t weightDecay,
bool isTraining)
370 : fLayers(), fBatchSize(batchSize), fInputDepth(inputDepth), fInputHeight(inputHeight), fInputWidth(inputWidth),
371 fBatchDepth(batchDepth), fBatchHeight(batchHeight), fBatchWidth(batchWidth), fIsTraining(isTraining), fJ(J), fI(I),
372 fR(R), fWeightDecay(weightDecay)
378 template <
typename Architecture_t,
typename Layer_t>
379 TDeepNet<Architecture_t, Layer_t>::TDeepNet(
const TDeepNet &deepNet)
380 : fLayers(), fBatchSize(deepNet.fBatchSize), fInputDepth(deepNet.fInputDepth), fInputHeight(deepNet.fInputHeight),
381 fInputWidth(deepNet.fInputWidth), fBatchDepth(deepNet.fBatchDepth), fBatchHeight(deepNet.fBatchHeight),
382 fBatchWidth(deepNet.fBatchWidth), fIsTraining(deepNet.fIsTraining), fJ(deepNet.fJ), fI(deepNet.fI), fR(deepNet.fR),
383 fWeightDecay(deepNet.fWeightDecay)
389 template <
typename Architecture_t,
typename Layer_t>
390 TDeepNet<Architecture_t, Layer_t>::~TDeepNet()
393 for (
auto layer : fLayers)
399 template <
typename Architecture_t,
typename Layer_t>
400 auto TDeepNet<Architecture_t, Layer_t>::calculateDimension(
int imgDim,
int fltDim,
int padding,
int stride) ->
size_t
402 Scalar_t dimension = ((imgDim - fltDim + 2 * padding) / stride) + 1;
403 if (!isInteger(dimension) || dimension <= 0) {
405 int iLayer = fLayers.size();
406 Fatal(
"calculateDimension",
"Not compatible hyper parameters for layer %d - (imageDim, filterDim, padding, stride) %d , %d , %d , %d",
407 iLayer, imgDim, fltDim, padding, stride);
413 return (
size_t)dimension;
417 template <
typename Architecture_t,
typename Layer_t>
418 TConvLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddConvLayer(
size_t depth,
size_t filterHeight,
419 size_t filterWidth,
size_t strideRows,
420 size_t strideCols,
size_t paddingHeight,
421 size_t paddingWidth, EActivationFunction f,
422 Scalar_t dropoutProbability)
425 size_t batchSize = this->GetBatchSize();
429 EInitialization init = this->GetInitialization();
430 ERegularization reg = this->GetRegularization();
431 Scalar_t decay = this->GetWeightDecay();
433 if (fLayers.size() == 0) {
434 inputDepth = this->GetInputDepth();
435 inputHeight = this->GetInputHeight();
436 inputWidth = this->GetInputWidth();
438 Layer_t *lastLayer = fLayers.back();
439 inputDepth = lastLayer->GetDepth();
440 inputHeight = lastLayer->GetHeight();
441 inputWidth = lastLayer->GetWidth();
447 TConvLayer<Architecture_t> *convLayer =
new TConvLayer<Architecture_t>(
448 batchSize, inputDepth, inputHeight, inputWidth, depth, init, filterHeight, filterWidth, strideRows,
449 strideCols, paddingHeight, paddingWidth, dropoutProbability, f, reg, decay);
451 fLayers.push_back(convLayer);
456 template <
typename Architecture_t,
typename Layer_t>
457 void TDeepNet<Architecture_t, Layer_t>::AddConvLayer(TConvLayer<Architecture_t> *convLayer)
459 fLayers.push_back(convLayer);
463 template <
typename Architecture_t,
typename Layer_t>
464 TMaxPoolLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddMaxPoolLayer(
size_t frameHeight,
size_t frameWidth,
465 size_t strideRows,
size_t strideCols,
466 Scalar_t dropoutProbability)
468 size_t batchSize = this->GetBatchSize();
473 if (fLayers.size() == 0) {
474 inputDepth = this->GetInputDepth();
475 inputHeight = this->GetInputHeight();
476 inputWidth = this->GetInputWidth();
478 Layer_t *lastLayer = fLayers.back();
479 inputDepth = lastLayer->GetDepth();
480 inputHeight = lastLayer->GetHeight();
481 inputWidth = lastLayer->GetWidth();
484 TMaxPoolLayer<Architecture_t> *maxPoolLayer =
new TMaxPoolLayer<Architecture_t>(
485 batchSize, inputDepth, inputHeight, inputWidth, frameHeight, frameWidth,
486 strideRows, strideCols, dropoutProbability);
489 fLayers.push_back(maxPoolLayer);
495 template <
typename Architecture_t,
typename Layer_t>
496 void TDeepNet<Architecture_t, Layer_t>::AddMaxPoolLayer(TMaxPoolLayer<Architecture_t> *maxPoolLayer)
498 fLayers.push_back(maxPoolLayer);
502 template <
typename Architecture_t,
typename Layer_t>
503 TBasicRNNLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBasicRNNLayer(
size_t stateSize,
size_t inputSize,
505 bool rememberState, EActivationFunction f)
511 size_t inputHeight, inputWidth, inputDepth;
512 if (fLayers.size() == 0) {
513 inputHeight = this->GetInputHeight();
514 inputWidth = this->GetInputWidth();
515 inputDepth = this->GetInputDepth();
517 Layer_t *lastLayer = fLayers.back();
518 inputHeight = lastLayer->GetHeight();
519 inputWidth = lastLayer->GetWidth();
520 inputDepth = lastLayer->GetDepth();
522 if (inputSize != inputWidth) {
523 Error(
"AddBasicRNNLayer",
"Inconsistent input size with input layout - it should be %zu instead of %zu",inputSize, inputWidth);
525 if (timeSteps != inputHeight || timeSteps != inputDepth) {
526 Error(
"AddBasicRNNLayer",
"Inconsistent time steps with input layout - it should be %zu instead of %zu",timeSteps, inputHeight);
529 TBasicRNNLayer<Architecture_t> *basicRNNLayer =
530 new TBasicRNNLayer<Architecture_t>(this->GetBatchSize(), stateSize, inputSize, timeSteps, rememberState,
531 f, fIsTraining, this->GetInitialization());
532 fLayers.push_back(basicRNNLayer);
533 return basicRNNLayer;
537 template <
typename Architecture_t,
typename Layer_t>
538 void TDeepNet<Architecture_t, Layer_t>::AddBasicRNNLayer(TBasicRNNLayer<Architecture_t> *basicRNNLayer)
540 fLayers.push_back(basicRNNLayer);
547 template <
typename Architecture_t,
typename Layer_t>
548 TCorruptionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(
size_t visibleUnits,
550 Scalar_t dropoutProbability,
551 Scalar_t corruptionLevel)
553 size_t batchSize = this->GetBatchSize();
555 TCorruptionLayer<Architecture_t> *corruptionLayer =
556 new TCorruptionLayer<Architecture_t>(batchSize, visibleUnits, hiddenUnits, dropoutProbability, corruptionLevel);
557 fLayers.push_back(corruptionLayer);
558 return corruptionLayer;
562 template <
typename Architecture_t,
typename Layer_t>
563 void TDeepNet<Architecture_t, Layer_t>::AddCorruptionLayer(TCorruptionLayer<Architecture_t> *corruptionLayer)
565 fLayers.push_back(corruptionLayer);
569 template <
typename Architecture_t,
typename Layer_t>
570 TCompressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(
571 size_t visibleUnits,
size_t hiddenUnits, Scalar_t dropoutProbability, EActivationFunction f,
572 std::vector<Matrix_t> weights, std::vector<Matrix_t> biases)
574 size_t batchSize = this->GetBatchSize();
576 TCompressionLayer<Architecture_t> *compressionLayer =
new TCompressionLayer<Architecture_t>(
577 batchSize, visibleUnits, hiddenUnits, dropoutProbability, f, weights, biases);
578 fLayers.push_back(compressionLayer);
579 return compressionLayer;
583 template <
typename Architecture_t,
typename Layer_t>
584 void TDeepNet<Architecture_t, Layer_t>::AddCompressionLayer(TCompressionLayer<Architecture_t> *compressionLayer)
586 fLayers.push_back(compressionLayer);
590 template <
typename Architecture_t,
typename Layer_t>
591 TReconstructionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
592 size_t visibleUnits,
size_t hiddenUnits, Scalar_t learningRate, EActivationFunction f, std::vector<Matrix_t> weights,
593 std::vector<Matrix_t> biases, Scalar_t corruptionLevel, Scalar_t dropoutProbability)
595 size_t batchSize = this->GetBatchSize();
597 TReconstructionLayer<Architecture_t> *reconstructionLayer =
new TReconstructionLayer<Architecture_t>(
598 batchSize, visibleUnits, hiddenUnits, learningRate, f, weights, biases, corruptionLevel, dropoutProbability);
599 fLayers.push_back(reconstructionLayer);
600 return reconstructionLayer;
604 template <
typename Architecture_t,
typename Layer_t>
605 void TDeepNet<Architecture_t, Layer_t>::AddReconstructionLayer(
606 TReconstructionLayer<Architecture_t> *reconstructionLayer)
608 fLayers.push_back(reconstructionLayer);
612 template <
typename Architecture_t,
typename Layer_t>
613 TLogisticRegressionLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
614 size_t inputUnits,
size_t outputUnits,
size_t testDataBatchSize, Scalar_t learningRate)
616 size_t batchSize = this->GetBatchSize();
618 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer =
619 new TLogisticRegressionLayer<Architecture_t>(batchSize, inputUnits, outputUnits, testDataBatchSize, learningRate);
620 fLayers.push_back(logisticRegressionLayer);
621 return logisticRegressionLayer;
624 template <
typename Architecture_t,
typename Layer_t>
625 void TDeepNet<Architecture_t, Layer_t>::AddLogisticRegressionLayer(
626 TLogisticRegressionLayer<Architecture_t> *logisticRegressionLayer)
628 fLayers.push_back(logisticRegressionLayer);
634 template <
typename Architecture_t,
typename Layer_t>
635 TDenseLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddDenseLayer(
size_t width, EActivationFunction f,
636 Scalar_t dropoutProbability)
638 size_t batchSize = this->GetBatchSize();
640 EInitialization init = this->GetInitialization();
641 ERegularization reg = this->GetRegularization();
642 Scalar_t decay = this->GetWeightDecay();
644 if (fLayers.size() == 0) {
645 inputWidth = this->GetInputWidth();
647 Layer_t *lastLayer = fLayers.back();
648 inputWidth = lastLayer->GetWidth();
651 TDenseLayer<Architecture_t> *denseLayer =
652 new TDenseLayer<Architecture_t>(batchSize, inputWidth, width, init, dropoutProbability, f, reg, decay);
654 fLayers.push_back(denseLayer);
660 template <
typename Architecture_t,
typename Layer_t>
661 void TDeepNet<Architecture_t, Layer_t>::AddDenseLayer(TDenseLayer<Architecture_t> *denseLayer)
663 fLayers.push_back(denseLayer);
667 template <
typename Architecture_t,
typename Layer_t>
668 TReshapeLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddReshapeLayer(
size_t depth,
size_t height,
669 size_t width,
bool flattening)
671 size_t batchSize = this->GetBatchSize();
675 size_t outputNSlices;
679 if (fLayers.size() == 0) {
680 inputDepth = this->GetInputDepth();
681 inputHeight = this->GetInputHeight();
682 inputWidth = this->GetInputWidth();
684 Layer_t *lastLayer = fLayers.back();
685 inputDepth = lastLayer->GetDepth();
686 inputHeight = lastLayer->GetHeight();
687 inputWidth = lastLayer->GetWidth();
692 outputNRows = this->GetBatchSize();
693 outputNCols = depth * height * width;
694 size_t inputNCols = inputDepth * inputHeight * inputWidth;
695 if (outputNCols != 0 && outputNCols != inputNCols ) {
696 Info(
"AddReshapeLayer",
"Dimensions not compatibles - product of input %zu x %zu x %zu should be equal to output %zu x %zu x %zu - Force flattening output to be %zu",
697 inputDepth, inputHeight, inputWidth, depth, height, width,inputNCols);
699 outputNCols = inputNCols;
704 outputNSlices = this->GetBatchSize();
706 outputNCols = height * width;
709 TReshapeLayer<Architecture_t> *reshapeLayer =
710 new TReshapeLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, depth, height, width,
711 outputNSlices, outputNRows, outputNCols, flattening);
713 fLayers.push_back(reshapeLayer);
719 template <
typename Architecture_t,
typename Layer_t>
720 TBatchNormLayer<Architecture_t> *TDeepNet<Architecture_t, Layer_t>::AddBatchNormLayer(Scalar_t momentum, Scalar_t epsilon)
723 size_t batchSize = this->GetBatchSize();
724 size_t inputDepth = 0;
725 size_t inputHeight = 0;
726 size_t inputWidth = 0;
729 std::vector<size_t> shape = {1, 1, 1};
730 if (fLayers.size() == 0) {
731 inputDepth = this->GetInputDepth();
732 inputHeight = this->GetInputHeight();
733 inputWidth = this->GetInputWidth();
735 shape[0] = batchSize;
736 shape[1] = inputWidth;
739 Layer_t *lastLayer = fLayers.back();
740 inputDepth = lastLayer->GetDepth();
741 inputHeight = lastLayer->GetHeight();
742 inputWidth = lastLayer->GetWidth();
743 shape = lastLayer->GetOutput().GetShape();
744 if (
dynamic_cast<TConvLayer<Architecture_t> *
>(lastLayer) !=
nullptr ||
745 dynamic_cast<TMaxPoolLayer<Architecture_t> *
>(lastLayer) !=
nullptr)
747 if (shape.size() > 3) {
748 for (
size_t i = 3; i < shape.size(); ++i)
749 shape[2] *= shape[i];
758 std::cout <<
"addBNormLayer " << inputDepth <<
" , " << inputHeight <<
" , " << inputWidth <<
" , " << shape[0]
759 <<
" " << shape[1] <<
" " << shape[2] << std::endl;
762 new TBatchNormLayer<Architecture_t>(batchSize, inputDepth, inputHeight, inputWidth, shape, axis, momentum, epsilon);
764 fLayers.push_back(bnormLayer);
770 template <
typename Architecture_t,
typename Layer_t>
771 void TDeepNet<Architecture_t, Layer_t>::AddReshapeLayer(TReshapeLayer<Architecture_t> *reshapeLayer)
773 fLayers.push_back(reshapeLayer);
777 template <
typename Architecture_t,
typename Layer_t>
778 auto TDeepNet<Architecture_t, Layer_t>::Initialize() ->
void
780 for (
size_t i = 0; i < fLayers.size(); i++) {
781 fLayers[i]->Initialize();
786 template <
typename Architecture_t,
typename Layer_t>
787 auto TDeepNet<Architecture_t, Layer_t>::ResetTraining() ->
void
789 for (
size_t i = 0; i < fLayers.size(); i++) {
790 fLayers[i]->ResetTraining();
796 template <
typename Architecture_t,
typename Layer_t>
797 auto TDeepNet<Architecture_t, Layer_t>::Forward( Tensor_t &input,
bool applyDropout) ->
void
799 fLayers.front()->Forward(input, applyDropout);
801 for (
size_t i = 1; i < fLayers.size(); i++) {
802 fLayers[i]->Forward(fLayers[i - 1]->GetOutput(), applyDropout);
811 template <
typename Architecture_t,
typename Layer_t>
812 auto TDeepNet<Architecture_t, Layer_t>::PreTrain(std::vector<Matrix_t> &input,
813 std::vector<size_t> numHiddenUnitsPerLayer, Scalar_t learningRate,
814 Scalar_t corruptionLevel, Scalar_t dropoutProbability,
size_t epochs,
815 EActivationFunction f,
bool applyDropout) ->
void
817 std::vector<Matrix_t> inp1;
818 std::vector<Matrix_t> inp2;
819 size_t numOfHiddenLayers =
sizeof(numHiddenUnitsPerLayer) /
sizeof(numHiddenUnitsPerLayer[0]);
821 size_t visibleUnits = (size_t)input[0].GetNrows();
823 AddCorruptionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, corruptionLevel);
824 fLayers.back()->Initialize();
825 fLayers.back()->Forward(input, applyDropout);
828 AddCompressionLayer(visibleUnits, numHiddenUnitsPerLayer[0], dropoutProbability, f, fLayers.back()->GetWeights(),
829 fLayers.back()->GetBiases());
830 fLayers.back()->Initialize();
831 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
833 AddReconstructionLayer(visibleUnits, numHiddenUnitsPerLayer[0], learningRate, f, fLayers.back()->GetWeights(),
834 fLayers.back()->GetBiases(), corruptionLevel, dropoutProbability);
835 fLayers.back()->Initialize();
836 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
838 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
841 size_t weightsSize = fLayers.back()->GetWeights().size();
842 size_t biasesSize = fLayers.back()->GetBiases().size();
843 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
845 for (
size_t j = 0; j < weightsSize; j++) {
846 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
848 for (
size_t j = 0; j < biasesSize; j++) {
849 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
851 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
852 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
853 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
854 fLayers[fLayers.size() - 3]->GetOutput(), input);
856 fLayers.back()->Print();
858 for (
size_t i = 1; i < numOfHiddenLayers; i++) {
860 AddCorruptionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, corruptionLevel);
861 fLayers.back()->Initialize();
862 fLayers.back()->Forward(fLayers[fLayers.size() - 3]->GetOutput(),
865 AddCompressionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], dropoutProbability, f,
866 fLayers.back()->GetWeights(), fLayers.back()->GetBiases());
867 fLayers.back()->Initialize();
868 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
870 AddReconstructionLayer(numHiddenUnitsPerLayer[i - 1], numHiddenUnitsPerLayer[i], learningRate, f,
871 fLayers.back()->GetWeights(), fLayers.back()->GetBiases(), corruptionLevel,
873 fLayers.back()->Initialize();
874 fLayers.back()->Forward(fLayers[fLayers.size() - 2]->GetOutput(),
876 fLayers.back()->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1, fLayers[fLayers.size() - 3]->GetOutput(),
877 fLayers[fLayers.size() - 5]->GetOutput());
880 size_t _weightsSize = fLayers.back()->GetWeights().size();
881 size_t _biasesSize = fLayers.back()->GetBiases().size();
882 for (
size_t epoch = 0; epoch < epochs - 1; epoch++) {
884 for (
size_t j = 0; j < _weightsSize; j++) {
885 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetWeightsAt(j), fLayers.back()->GetWeightsAt(j));
887 for (
size_t j = 0; j < _biasesSize; j++) {
888 Architecture_t::Copy(fLayers[fLayers.size() - 2]->GetBiasesAt(j), fLayers.back()->GetBiasesAt(j));
890 fLayers[fLayers.size() - 2]->Forward(fLayers[fLayers.size() - 3]->GetOutput(), applyDropout);
891 fLayers[fLayers.size() - 1]->Forward(fLayers[fLayers.size() - 2]->GetOutput(), applyDropout);
892 fLayers[fLayers.size() - 1]->Backward(fLayers[fLayers.size() - 2]->GetOutput(), inp1,
893 fLayers[fLayers.size() - 3]->GetOutput(),
894 fLayers[fLayers.size() - 5]->GetOutput());
896 fLayers.back()->Print();
901 template <
typename Architecture_t,
typename Layer_t>
902 auto TDeepNet<Architecture_t, Layer_t>::FineTune(std::vector<Matrix_t> &input, std::vector<Matrix_t> &testInput,
903 std::vector<Matrix_t> &inputLabel,
size_t outputUnits,
904 size_t testDataBatchSize, Scalar_t learningRate,
size_t epochs) ->
void
906 std::vector<Matrix_t> inp1;
907 std::vector<Matrix_t> inp2;
908 if (fLayers.size() == 0)
910 size_t inputUnits = input[0].GetNrows();
912 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
913 fLayers.back()->Initialize();
914 for (
size_t i = 0; i < epochs; i++) {
915 fLayers.back()->Backward(inputLabel, inp1, input, inp2);
917 fLayers.back()->Forward(input,
false);
918 fLayers.back()->Print();
920 size_t inputUnits = fLayers.back()->GetOutputAt(0).GetNrows();
921 AddLogisticRegressionLayer(inputUnits, outputUnits, testDataBatchSize, learningRate);
922 fLayers.back()->Initialize();
923 for (
size_t i = 0; i < epochs; i++) {
924 fLayers.back()->Backward(inputLabel, inp1, fLayers[fLayers.size() - 2]->GetOutput(), inp2);
926 fLayers.back()->Forward(testInput,
false);
927 fLayers.back()->Print();
933 template <
typename Architecture_t,
typename Layer_t>
934 auto TDeepNet<Architecture_t, Layer_t>::Backward(
const Tensor_t &input,
const Matrix_t &groundTruth,
935 const Matrix_t &weights) ->
void
940 Matrix_t last_actgrad = fLayers.back()->GetActivationGradientsAt(0);
941 Matrix_t last_output = fLayers.back()->GetOutputAt(0);
942 evaluateGradients<Architecture_t>(last_actgrad, this->GetLossFunction(), groundTruth,
943 last_output, weights);
945 for (
size_t i = fLayers.size() - 1; i > 0; i--) {
946 auto &activation_gradient_backward = fLayers[i - 1]->GetActivationGradients();
947 auto &activations_backward = fLayers[i - 1]->GetOutput();
948 fLayers[i]->Backward(activation_gradient_backward, activations_backward);
954 fLayers[0]->Backward(dummy, input);
957 #ifdef USE_PARALLEL_DEEPNET
960 template <
typename Architecture_t,
typename Layer_t>
961 auto TDeepNet<Architecture_t, Layer_t>::ParallelForward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
962 std::vector<TTensorBatch<Architecture_t>> &batches,
963 bool applyDropout) ->
void
965 size_t depth = this->GetDepth();
968 for (
size_t i = 0; i < nets.size(); i++) {
969 nets[i].GetLayerAt(0)->Forward(batches[i].GetInput(), applyDropout);
973 for (
size_t i = 1; i < depth; i++) {
974 for (
size_t j = 0; j < nets.size(); j++) {
975 nets[j].GetLayerAt(i)->Forward(nets[j].GetLayerAt(i - 1)->GetOutput(), applyDropout);
981 template <
typename Architecture_t,
typename Layer_t>
982 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackward(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
983 std::vector<TTensorBatch<Architecture_t>> &batches,
984 Scalar_t learningRate) ->
void
986 std::vector<Matrix_t> inp1;
987 std::vector<Matrix_t> inp2;
988 size_t depth = this->GetDepth();
991 for (
size_t i = 0; i < nets.size(); i++) {
992 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
993 nets[i].GetLossFunction(), batches[i].GetOutput(),
994 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
998 for (
size_t i = depth - 1; i > 0; i--) {
999 for (
size_t j = 0; j < nets.size(); j++) {
1000 nets[j].GetLayerAt(i)->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(),
1001 nets[j].GetLayerAt(i - 1)->GetOutput(), inp1, inp2);
1005 std::vector<Matrix_t> dummy;
1008 for (
size_t i = 0; i < nets.size(); i++) {
1009 nets[i].GetLayerAt(0)->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1013 for (
size_t i = 0; i < nets.size(); i++) {
1014 for (
size_t j = 0; j < depth; j++) {
1015 Layer_t *masterLayer = this->GetLayerAt(j);
1016 Layer_t *layer = nets[i].GetLayerAt(j);
1018 masterLayer->UpdateWeights(layer->GetWeightGradients(), learningRate);
1019 layer->CopyWeights(masterLayer->GetWeights());
1021 masterLayer->UpdateBiases(layer->GetBiasGradients(), learningRate);
1022 layer->CopyBiases(masterLayer->GetBiases());
1028 template <
typename Architecture_t,
typename Layer_t>
1029 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardMomentum(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1030 std::vector<TTensorBatch<Architecture_t>> &batches,
1031 Scalar_t learningRate, Scalar_t momentum) ->
void
1033 std::vector<Matrix_t> inp1;
1034 std::vector<Matrix_t> inp2;
1035 size_t depth = this->GetDepth();
1038 for (
size_t i = 0; i < nets.size(); i++) {
1039 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1040 nets[i].GetLossFunction(), batches[i].GetOutput(),
1041 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1045 for (
size_t i = depth - 1; i > 0; i--) {
1046 Layer_t *masterLayer = this->GetLayerAt(i);
1048 for (
size_t j = 0; j < nets.size(); j++) {
1049 Layer_t *layer = nets[j].GetLayerAt(i);
1051 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1053 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1054 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1057 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1058 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1061 std::vector<Matrix_t> dummy;
1064 Layer_t *masterFirstLayer = this->GetLayerAt(0);
1065 for (
size_t i = 0; i < nets.size(); i++) {
1066 Layer_t *layer = nets[i].GetLayerAt(0);
1068 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1070 masterFirstLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1071 masterFirstLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1074 masterFirstLayer->UpdateWeightGradients(masterFirstLayer->GetWeightGradients(), 1.0 - momentum);
1075 masterFirstLayer->UpdateBiasGradients(masterFirstLayer->GetBiasGradients(), 1.0 - momentum);
1077 for (
size_t i = 0; i < depth; i++) {
1078 Layer_t *masterLayer = this->GetLayerAt(i);
1079 masterLayer->Update(1.0);
1081 for (
size_t j = 0; j < nets.size(); j++) {
1082 Layer_t *layer = nets[j].GetLayerAt(i);
1084 layer->CopyWeights(masterLayer->GetWeights());
1085 layer->CopyBiases(masterLayer->GetBiases());
1091 template <
typename Architecture_t,
typename Layer_t>
1092 auto TDeepNet<Architecture_t, Layer_t>::ParallelBackwardNestorov(std::vector<TDeepNet<Architecture_t, Layer_t>> &nets,
1093 std::vector<TTensorBatch<Architecture_t>> &batches,
1094 Scalar_t learningRate, Scalar_t momentum) ->
void
1096 std::cout <<
"Parallel Backward Nestorov" << std::endl;
1097 std::vector<Matrix_t> inp1;
1098 std::vector<Matrix_t> inp2;
1099 size_t depth = this->GetDepth();
1102 for (
size_t i = 0; i < nets.size(); i++) {
1103 evaluateGradients<Architecture_t>(nets[i].GetLayerAt(depth - 1)->GetActivationGradientsAt(0),
1104 nets[i].GetLossFunction(), batches[i].GetOutput(),
1105 nets[i].GetLayerAt(depth - 1)->GetOutputAt(0), batches[i].GetWeights());
1109 for (
size_t i = depth - 1; i > 0; i--) {
1110 for (
size_t j = 0; j < nets.size(); j++) {
1111 Layer_t *layer = nets[j].GetLayerAt(i);
1113 layer->Backward(nets[j].GetLayerAt(i - 1)->GetActivationGradients(), nets[j].GetLayerAt(i - 1)->GetOutput(),
1118 std::vector<Matrix_t> dummy;
1121 for (
size_t i = 0; i < nets.size(); i++) {
1122 Layer_t *layer = nets[i].GetLayerAt(0);
1123 layer->Backward(dummy, batches[i].GetInput(), inp1, inp2);
1126 for (
size_t i = 0; i < depth; i++) {
1127 Layer_t *masterLayer = this->GetLayerAt(i);
1128 for (
size_t j = 0; j < nets.size(); j++) {
1129 Layer_t *layer = nets[j].GetLayerAt(i);
1131 layer->CopyWeights(masterLayer->GetWeights());
1132 layer->CopyBiases(masterLayer->GetBiases());
1134 layer->UpdateWeights(masterLayer->GetWeightGradients(), 1.0);
1135 layer->UpdateBiases(masterLayer->GetBiasGradients(), 1.0);
1138 for (
size_t j = 0; j < nets.size(); j++) {
1139 Layer_t *layer = nets[j].GetLayerAt(i);
1141 masterLayer->UpdateWeightGradients(layer->GetWeightGradients(), learningRate / momentum);
1142 masterLayer->UpdateBiasGradients(layer->GetBiasGradients(), learningRate / momentum);
1145 masterLayer->UpdateWeightGradients(masterLayer->GetWeightGradients(), 1.0 - momentum);
1146 masterLayer->UpdateBiasGradients(masterLayer->GetBiasGradients(), 1.0 - momentum);
1148 masterLayer->Update(1.0);
1151 #endif // use parallel deep net
1154 template <
typename Architecture_t,
typename Layer_t>
1155 auto TDeepNet<Architecture_t, Layer_t>::Update(Scalar_t learningRate) ->
void
1157 for (
size_t i = 0; i < fLayers.size(); i++) {
1158 fLayers[i]->Update(learningRate);
1163 template <
typename Architecture_t,
typename Layer_t>
1164 auto TDeepNet<Architecture_t, Layer_t>::Loss(
const Matrix_t &groundTruth,
const Matrix_t &weights,
1165 bool includeRegularization)
const -> Scalar_t
1168 auto loss = evaluate<Architecture_t>(this->GetLossFunction(), groundTruth, fLayers.back()->GetOutputAt(0), weights);
1170 includeRegularization &= (this->GetRegularization() != ERegularization::kNone);
1171 if (includeRegularization) {
1172 loss += RegularizationTerm();
1179 template <
typename Architecture_t,
typename Layer_t>
1180 auto TDeepNet<Architecture_t, Layer_t>::Loss(Tensor_t &input,
const Matrix_t &groundTruth,
1181 const Matrix_t &weights,
bool inTraining,
bool includeRegularization)
1184 Forward(input, inTraining);
1185 return Loss(groundTruth, weights, includeRegularization);
1189 template <
typename Architecture_t,
typename Layer_t>
1190 auto TDeepNet<Architecture_t, Layer_t>::RegularizationTerm() const -> Scalar_t
1193 for (
size_t i = 0; i < fLayers.size(); i++) {
1194 for (
size_t j = 0; j < (fLayers[i]->GetWeights()).size(); j++) {
1195 reg += regularization<Architecture_t>(fLayers[i]->GetWeightsAt(j), this->GetRegularization());
1198 return this->GetWeightDecay() * reg;
1203 template <
typename Architecture_t,
typename Layer_t>
1204 auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, EOutputFunction f)
const ->
void
1207 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1211 template <
typename Architecture_t,
typename Layer_t>
1212 auto TDeepNet<Architecture_t, Layer_t>::Prediction(Matrix_t &predictions, Tensor_t & input,
1213 EOutputFunction f) ->
void
1215 Forward(input,
false);
1217 evaluate<Architecture_t>(predictions, f, fLayers.back()->GetOutputAt(0));
1221 template <
typename Architecture_t,
typename Layer_t>
1222 auto TDeepNet<Architecture_t, Layer_t>::Print() const ->
void
1224 std::cout <<
"DEEP NEURAL NETWORK: Depth = " << this->GetDepth();
1225 std::cout <<
" Input = ( " << this->GetInputDepth();
1226 std::cout <<
", " << this->GetInputHeight();
1227 std::cout <<
", " << this->GetInputWidth() <<
" )";
1228 std::cout <<
" Batch size = " << this->GetBatchSize();
1229 std::cout <<
" Loss function = " <<
static_cast<char>(this->GetLossFunction()) << std::endl;
1233 for (
size_t i = 0; i < fLayers.size(); i++) {
1234 std::cout <<
"\tLayer " << i <<
"\t";
1235 fLayers[i]->Print();
1240 template <
typename Architecture_t,
typename Layer_t>
1241 void TDeepNet<Architecture_t, Layer_t>::SetDropoutProbabilities(
1242 const std::vector<Double_t> & probabilities)
1244 for (
size_t i = 0; i < fLayers.size(); i++) {
1245 if (i < probabilities.size()) {
1246 fLayers[i]->SetDropoutProbability(probabilities[i]);
1248 fLayers[i]->SetDropoutProbability(1.0);