53 ClassImp(TMVA::MethodDL);
55 using namespace TMVA::DNN::CNN;
56 using namespace TMVA::DNN;
58 using TMVA::DNN::EActivationFunction;
59 using TMVA::DNN::ELossFunction;
60 using TMVA::DNN::EInitialization;
61 using TMVA::DNN::EOutputFunction;
62 using TMVA::DNN::EOptimizer;
69 TString fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key)
72 std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
73 if (it == keyValueMap.end()) {
81 T fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
85 int fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
int defaultValue)
87 TString value(fetchValueTmp(keyValueMap, key));
96 double fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
double defaultValue)
98 TString value(fetchValueTmp(keyValueMap, key));
107 TString fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
109 TString value(fetchValueTmp(keyValueMap, key));
118 bool fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
bool defaultValue)
120 TString value(fetchValueTmp(keyValueMap, key));
126 if (value ==
"TRUE" || value ==
"T" || value ==
"1") {
135 std::vector<double> fetchValueTmp(
const std::map<TString, TString> &keyValueMap, TString key,
136 std::vector<double> defaultValue)
138 TString parseString(fetchValueTmp(keyValueMap, key));
139 if (parseString ==
"") {
143 parseString.ToUpper();
144 std::vector<double> values;
146 const TString tokenDelim(
"+");
147 TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
148 TIter nextToken(tokenStrings);
149 TObjString *tokenString = (TObjString *)nextToken();
150 for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
151 std::stringstream sstr;
153 sstr << tokenString->GetString().Data();
154 sstr >> currentValue;
155 values.push_back(currentValue);
161 void MethodDL::DeclareOptions()
165 DeclareOptionRef(fInputLayoutString =
"0|0|0",
"InputLayout",
"The Layout of the input");
167 DeclareOptionRef(fBatchLayoutString =
"0|0|0",
"BatchLayout",
"The Layout of the batch");
169 DeclareOptionRef(fLayoutString =
"DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR",
"Layout",
"Layout of the network.");
171 DeclareOptionRef(fErrorStrategy =
"CROSSENTROPY",
"ErrorStrategy",
"Loss function: Mean squared error (regression)"
172 " or cross entropy (binary classification).");
173 AddPreDefVal(TString(
"CROSSENTROPY"));
174 AddPreDefVal(TString(
"SUMOFSQUARES"));
175 AddPreDefVal(TString(
"MUTUALEXCLUSIVE"));
177 DeclareOptionRef(fWeightInitializationString =
"XAVIER",
"WeightInitialization",
"Weight initialization strategy");
178 AddPreDefVal(TString(
"XAVIER"));
179 AddPreDefVal(TString(
"XAVIERUNIFORM"));
180 AddPreDefVal(TString(
"GAUSS"));
181 AddPreDefVal(TString(
"UNIFORM"));
182 AddPreDefVal(TString(
"IDENTITY"));
183 AddPreDefVal(TString(
"ZERO"));
185 DeclareOptionRef(fRandomSeed = 0,
"RandomSeed",
"Random seed used for weight initialization and batch shuffling");
187 DeclareOptionRef(fNumValidationString =
"20%",
"ValidationSize",
"Part of the training data to use for validation. "
188 "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
189 "Specify as 100 to use exactly 100 events. (Default: 20%)");
191 DeclareOptionRef(fArchitectureString =
"CPU",
"Architecture",
"Which architecture to perform the training on.");
192 AddPreDefVal(TString(
"STANDARD"));
193 AddPreDefVal(TString(
"CPU"));
194 AddPreDefVal(TString(
"GPU"));
195 AddPreDefVal(TString(
"OPENCL"));
196 AddPreDefVal(TString(
"CUDNN"));
199 DeclareOptionRef(fTrainingStrategyString =
"LearningRate=1e-1,"
202 "ConvergenceSteps=50,"
206 "Regularization=None,"
213 "ConvergenceSteps=50,"
219 "DropConfig=0.0+0.5+0.5,"
221 "Multithreading=True",
222 "TrainingStrategy",
"Defines the training strategies.");
226 void MethodDL::ProcessOptions()
229 if (IgnoreEventsWithNegWeightsInTraining()) {
230 Log() << kINFO <<
"Will ignore negative events in training!" << Endl;
233 if (fArchitectureString ==
"STANDARD") {
234 Log() << kINFO <<
"The STANDARD architecture has been deprecated. "
235 "Please use Architecture=CPU or Architecture=CPU."
236 "See the TMVA Users' Guide for instructions if you "
237 "encounter problems."
240 if (fArchitectureString ==
"OPENCL") {
241 Log() << kERROR <<
"The OPENCL architecture has not been implemented yet. "
242 "Please use Architecture=CPU or Architecture=CPU for the "
243 "time being. See the TMVA Users' Guide for instructions "
244 "if you encounter problems."
251 if (fArchitectureString ==
"GPU") {
252 #ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
253 Log() << kERROR <<
"CUDA backend not enabled. Please make sure "
254 "you have CUDA installed and it was successfully "
255 "detected by CMAKE by using -Dtmva-gpu=On "
257 #ifdef R__HAS_TMVACPU
258 fArchitectureString =
"CPU";
259 Log() << kINFO <<
"Will now use the CPU architecture !" << Endl;
261 fArchitectureString =
"Standard";
262 Log() << kINFO <<
"Will now use the Standard architecture !" << Endl;
265 Log() << kINFO <<
"Will now use the GPU architecture !" << Endl;
268 else if (fArchitectureString ==
"CUDNN") {
269 #ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
270 Log() << kERROR <<
"CUDA+CUDNN backend not enabled. Please make sure "
271 "you have CUDNN and CUDA installed and that the GPU capability/CUDA "
272 "was successfully detected by CMAKE by using -Dtmva-gpu=On"
274 #ifdef R__HAS_TMVACPU
275 fArchitectureString =
"CPU";
276 Log() << kINFO <<
"Will now use the CPU architecture !" << Endl;
278 fArchitectureString =
"Standard";
279 Log() << kINFO <<
"Will now use the Standard architecture !" << Endl;
282 Log() << kINFO <<
"Will now use the GPU architecture !" << Endl;
286 else if (fArchitectureString ==
"CPU") {
287 #ifndef R__HAS_TMVACPU // TMVA has no CPU support
288 Log() << kERROR <<
"Multi-core CPU backend not enabled. Please make sure "
289 "you have a BLAS implementation and it was successfully "
290 "detected by CMake as well that the imt CMake flag is set."
292 #ifdef R__HAS_TMVAGPU
293 fArchitectureString =
"GPU";
294 Log() << kINFO <<
"Will now use the GPU architecture !" << Endl;
296 fArchitectureString =
"STANDARD";
297 Log() << kINFO <<
"Will now use the Standard architecture !" << Endl;
300 Log() << kINFO <<
"Will now use the CPU architecture !" << Endl;
305 Log() << kINFO <<
"Will use the deprecated STANDARD architecture !" << Endl;
306 fArchitectureString =
"STANDARD";
314 fOutputFunction = EOutputFunction::kSigmoid;
315 if (fAnalysisType == Types::kClassification) {
316 if (fErrorStrategy ==
"SUMOFSQUARES") {
317 fLossFunction = ELossFunction::kMeanSquaredError;
319 if (fErrorStrategy ==
"CROSSENTROPY") {
320 fLossFunction = ELossFunction::kCrossEntropy;
322 fOutputFunction = EOutputFunction::kSigmoid;
323 }
else if (fAnalysisType == Types::kRegression) {
324 if (fErrorStrategy !=
"SUMOFSQUARES") {
325 Log() << kWARNING <<
"For regression only SUMOFSQUARES is a valid "
326 <<
" neural net error function. Setting error function to "
327 <<
" SUMOFSQUARES now." << Endl;
330 fLossFunction = ELossFunction::kMeanSquaredError;
331 fOutputFunction = EOutputFunction::kIdentity;
332 }
else if (fAnalysisType == Types::kMulticlass) {
333 if (fErrorStrategy ==
"SUMOFSQUARES") {
334 fLossFunction = ELossFunction::kMeanSquaredError;
336 if (fErrorStrategy ==
"CROSSENTROPY") {
337 fLossFunction = ELossFunction::kCrossEntropy;
339 if (fErrorStrategy ==
"MUTUALEXCLUSIVE") {
340 fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
342 fOutputFunction = EOutputFunction::kSoftmax;
347 if (fWeightInitializationString ==
"XAVIER") {
348 fWeightInitialization = DNN::EInitialization::kGlorotNormal;
349 }
else if (fWeightInitializationString ==
"XAVIERUNIFORM") {
350 fWeightInitialization = DNN::EInitialization::kGlorotUniform;
351 }
else if (fWeightInitializationString ==
"GAUSS") {
352 fWeightInitialization = DNN::EInitialization::kGauss;
353 }
else if (fWeightInitializationString ==
"UNIFORM") {
354 fWeightInitialization = DNN::EInitialization::kUniform;
355 }
else if (fWeightInitializationString ==
"ZERO") {
356 fWeightInitialization = DNN::EInitialization::kZero;
357 }
else if (fWeightInitializationString ==
"IDENTITY") {
358 fWeightInitialization = DNN::EInitialization::kIdentity;
360 fWeightInitialization = DNN::EInitialization::kGlorotUniform;
365 KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString(
"|"), TString(
","));
366 for (
auto &block : strategyKeyValues) {
367 TTrainingSettings settings;
369 settings.convergenceSteps = fetchValueTmp(block,
"ConvergenceSteps", 100);
370 settings.batchSize = fetchValueTmp(block,
"BatchSize", 30);
371 settings.maxEpochs = fetchValueTmp(block,
"MaxEpochs", 2000);
372 settings.testInterval = fetchValueTmp(block,
"TestRepetitions", 7);
373 settings.weightDecay = fetchValueTmp(block,
"WeightDecay", 0.0);
374 settings.learningRate = fetchValueTmp(block,
"LearningRate", 1e-5);
375 settings.momentum = fetchValueTmp(block,
"Momentum", 0.3);
376 settings.dropoutProbabilities = fetchValueTmp(block,
"DropConfig", std::vector<Double_t>());
378 TString regularization = fetchValueTmp(block,
"Regularization", TString(
"NONE"));
379 if (regularization ==
"L1") {
380 settings.regularization = DNN::ERegularization::kL1;
381 }
else if (regularization ==
"L2") {
382 settings.regularization = DNN::ERegularization::kL2;
384 settings.regularization = DNN::ERegularization::kNone;
387 TString optimizer = fetchValueTmp(block,
"Optimizer", TString(
"ADAM"));
388 settings.optimizerName = optimizer;
389 if (optimizer ==
"SGD") {
390 settings.optimizer = DNN::EOptimizer::kSGD;
391 }
else if (optimizer ==
"ADAM") {
392 settings.optimizer = DNN::EOptimizer::kAdam;
393 }
else if (optimizer ==
"ADAGRAD") {
394 settings.optimizer = DNN::EOptimizer::kAdagrad;
395 }
else if (optimizer ==
"RMSPROP") {
396 settings.optimizer = DNN::EOptimizer::kRMSProp;
397 }
else if (optimizer ==
"ADADELTA") {
398 settings.optimizer = DNN::EOptimizer::kAdadelta;
402 settings.optimizer = DNN::EOptimizer::kAdam;
403 settings.optimizerName =
"ADAM";
407 TString strMultithreading = fetchValueTmp(block,
"Multithreading", TString(
"True"));
409 if (strMultithreading.BeginsWith(
"T")) {
410 settings.multithreading =
true;
412 settings.multithreading =
false;
415 fTrainingSettings.push_back(settings);
418 this->SetBatchSize(fTrainingSettings.front().batchSize);
426 if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
429 fInputShape[3] = GetNVariables();
431 if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
432 if (fInputShape[2] == 1 && fInputShape[1] == 1) {
435 fBatchHeight = fTrainingSettings.front().batchSize;
436 fBatchWidth = fInputShape[3];
439 fBatchDepth = fTrainingSettings.front().batchSize;
440 fBatchHeight = fInputShape[1];
441 fBatchWidth = fInputShape[3]*fInputShape[2];
448 void MethodDL::Init()
455 void MethodDL::ParseInputLayout()
458 const TString delim(
"|");
461 TString inputLayoutString = this->GetInputLayoutString();
464 TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
465 TIter nextInputDim(inputDimStrings);
466 TObjString *inputDimString = (TObjString *)nextInputDim();
471 std::vector<size_t> inputShape;
472 inputShape.reserve(inputLayoutString.Length()/2 + 2);
473 inputShape.push_back(30);
474 for (; inputDimString !=
nullptr; inputDimString = (TObjString *)nextInputDim()) {
476 subDim = (size_t) abs(inputDimString->GetString().Atoi());
479 inputShape.push_back(subDim);
482 this->SetInputShape(inputShape);
487 void MethodDL::ParseBatchLayout()
490 const TString delim(
"|");
493 TString batchLayoutString = this->GetBatchLayoutString();
495 size_t batchDepth = 0;
496 size_t batchHeight = 0;
497 size_t batchWidth = 0;
500 TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
501 TIter nextBatchDim(batchDimStrings);
502 TObjString *batchDimString = (TObjString *)nextBatchDim();
505 for (; batchDimString !=
nullptr; batchDimString = (TObjString *)nextBatchDim()) {
509 TString strDepth(batchDimString->GetString());
510 batchDepth = (size_t)strDepth.Atoi();
514 TString strHeight(batchDimString->GetString());
515 batchHeight = (size_t)strHeight.Atoi();
519 TString strWidth(batchDimString->GetString());
520 batchWidth = (size_t)strWidth.Atoi();
526 this->SetBatchDepth(batchDepth);
527 this->SetBatchHeight(batchHeight);
528 this->SetBatchWidth(batchWidth);
533 template <
typename Architecture_t,
typename Layer_t>
534 void MethodDL::CreateDeepNet(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
535 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets)
538 const TString layerDelimiter(
",");
539 const TString subDelimiter(
"|");
541 TString layoutString = this->GetLayoutString();
546 TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
547 TIter nextLayer(layerStrings);
548 TObjString *layerString = (TObjString *)nextLayer();
551 for (; layerString !=
nullptr; layerString = (TObjString *)nextLayer()) {
554 TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
555 TIter nextToken(subStrings);
556 TObjString *token = (TObjString *)nextToken();
559 TString strLayerType = token->GetString();
562 if (strLayerType ==
"DENSE") {
563 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
564 }
else if (strLayerType ==
"CONV") {
565 ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
566 }
else if (strLayerType ==
"MAXPOOL") {
567 ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
568 }
else if (strLayerType ==
"RESHAPE") {
569 ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
570 }
else if (strLayerType ==
"BNORM") {
571 ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
572 }
else if (strLayerType ==
"RNN") {
573 ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter);
580 ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
587 template <
typename Architecture_t,
typename Layer_t>
588 void MethodDL::ParseDenseLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
589 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & , TString layerString,
593 EActivationFunction activationFunction = EActivationFunction::kTanh;
598 const size_t inputSize = GetNvar();
601 TObjArray *subStrings = layerString.Tokenize(delim);
602 TIter nextToken(subStrings);
603 TObjString *token = (TObjString *)nextToken();
609 for (; token !=
nullptr; token = (TObjString *)nextToken()) {
612 TString strActFnc(token->GetString());
614 if (strActFnc ==
"DENSE")
continue;
616 if (strActFnc ==
"RELU") {
617 activationFunction = DNN::EActivationFunction::kRelu;
618 }
else if (strActFnc ==
"TANH") {
619 activationFunction = DNN::EActivationFunction::kTanh;
620 }
else if (strActFnc ==
"SYMMRELU") {
621 activationFunction = DNN::EActivationFunction::kSymmRelu;
622 }
else if (strActFnc ==
"SOFTSIGN") {
623 activationFunction = DNN::EActivationFunction::kSoftSign;
624 }
else if (strActFnc ==
"SIGMOID") {
625 activationFunction = DNN::EActivationFunction::kSigmoid;
626 }
else if (strActFnc ==
"LINEAR") {
627 activationFunction = DNN::EActivationFunction::kIdentity;
628 }
else if (strActFnc ==
"GAUSS") {
629 activationFunction = DNN::EActivationFunction::kGauss;
630 }
else if (width == 0) {
634 TString strNumNodes = strActFnc;
637 strNumNodes.ReplaceAll(
"N", strN);
638 strNumNodes.ReplaceAll(
"n", strN);
639 TFormula fml(
"tmp", strNumNodes);
640 width = fml.Eval(inputSize);
645 if (width == 0) width = 1;
648 TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
649 denseLayer->Initialize();
652 if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
667 template <
typename Architecture_t,
typename Layer_t>
668 void MethodDL::ParseConvLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
669 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & , TString layerString,
677 int zeroPadHeight = 0;
678 int zeroPadWidth = 0;
679 EActivationFunction activationFunction = EActivationFunction::kTanh;
682 TObjArray *subStrings = layerString.Tokenize(delim);
683 TIter nextToken(subStrings);
684 TObjString *token = (TObjString *)nextToken();
687 for (; token !=
nullptr; token = (TObjString *)nextToken()) {
691 TString strDepth(token->GetString());
692 depth = strDepth.Atoi();
696 TString strFltHeight(token->GetString());
697 fltHeight = strFltHeight.Atoi();
701 TString strFltWidth(token->GetString());
702 fltWidth = strFltWidth.Atoi();
706 TString strStrideRows(token->GetString());
707 strideRows = strStrideRows.Atoi();
711 TString strStrideCols(token->GetString());
712 strideCols = strStrideCols.Atoi();
716 TString strZeroPadHeight(token->GetString());
717 zeroPadHeight = strZeroPadHeight.Atoi();
721 TString strZeroPadWidth(token->GetString());
722 zeroPadWidth = strZeroPadWidth.Atoi();
726 TString strActFnc(token->GetString());
727 if (strActFnc ==
"RELU") {
728 activationFunction = DNN::EActivationFunction::kRelu;
729 }
else if (strActFnc ==
"TANH") {
730 activationFunction = DNN::EActivationFunction::kTanh;
731 }
else if (strActFnc ==
"SYMMRELU") {
732 activationFunction = DNN::EActivationFunction::kSymmRelu;
733 }
else if (strActFnc ==
"SOFTSIGN") {
734 activationFunction = DNN::EActivationFunction::kSoftSign;
735 }
else if (strActFnc ==
"SIGMOID") {
736 activationFunction = DNN::EActivationFunction::kSigmoid;
737 }
else if (strActFnc ==
"LINEAR") {
738 activationFunction = DNN::EActivationFunction::kIdentity;
739 }
else if (strActFnc ==
"GAUSS") {
740 activationFunction = DNN::EActivationFunction::kGauss;
748 TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
749 zeroPadHeight, zeroPadWidth, activationFunction);
750 convLayer->Initialize();
753 if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
754 zeroPadHeight, zeroPadWidth, activationFunction);
766 template <
typename Architecture_t,
typename Layer_t>
767 void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
768 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & , TString layerString,
772 int filterHeight = 0;
778 TObjArray *subStrings = layerString.Tokenize(delim);
779 TIter nextToken(subStrings);
780 TObjString *token = (TObjString *)nextToken();
783 for (; token !=
nullptr; token = (TObjString *)nextToken()) {
787 TString strFrmHeight(token->GetString());
788 filterHeight = strFrmHeight.Atoi();
792 TString strFrmWidth(token->GetString());
793 filterWidth = strFrmWidth.Atoi();
797 TString strStrideRows(token->GetString());
798 strideRows = strStrideRows.Atoi();
802 TString strStrideCols(token->GetString());
803 strideCols = strStrideCols.Atoi();
811 deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
814 if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
827 template <
typename Architecture_t,
typename Layer_t>
828 void MethodDL::ParseReshapeLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
829 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & , TString layerString,
835 bool flattening =
false;
838 TObjArray *subStrings = layerString.Tokenize(delim);
839 TIter nextToken(subStrings);
840 TObjString *token = (TObjString *)nextToken();
843 for (; token !=
nullptr; token = (TObjString *)nextToken()) {
844 if (token->GetString() ==
"FLAT") idxToken=4;
847 TString strDepth(token->GetString());
848 depth = strDepth.Atoi();
852 TString strHeight(token->GetString());
853 height = strHeight.Atoi();
857 TString strWidth(token->GetString());
858 width = strWidth.Atoi();
862 TString flat(token->GetString());
863 if (flat ==
"FLAT") {
873 deepNet.AddReshapeLayer(depth, height, width, flattening);
876 if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
888 template <
typename Architecture_t,
typename Layer_t>
889 void MethodDL::ParseBatchNormLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
890 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & , TString layerString,
895 double momentum = -1;
896 double epsilon = 0.0001;
899 TObjArray *subStrings = layerString.Tokenize(delim);
900 TIter nextToken(subStrings);
901 TObjString *token = (TObjString *)nextToken();
904 for (; token !=
nullptr; token = (TObjString *)nextToken()) {
907 momentum = std::atof(token->GetString().Data());
911 epsilon = std::atof(token->GetString().Data());
919 auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
923 if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
929 template <
typename Architecture_t,
typename Layer_t>
930 void MethodDL::ParseRnnLayer(DNN::TDeepNet<Architecture_t, Layer_t> & deepNet,
931 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & , TString layerString,
938 bool rememberState =
false;
941 TObjArray *subStrings = layerString.Tokenize(delim);
942 TIter nextToken(subStrings);
943 TObjString *token = (TObjString *)nextToken();
946 for (; token !=
nullptr; token = (TObjString *)nextToken()) {
950 TString strstateSize(token->GetString());
951 stateSize = strstateSize.Atoi();
955 TString strinputSize(token->GetString());
956 inputSize = strinputSize.Atoi();
960 TString strtimeSteps(token->GetString());
961 timeSteps = strtimeSteps.Atoi();
965 TString strrememberState(token->GetString());
966 rememberState = (bool) strrememberState.Atoi();
973 TBasicRNNLayer<Architecture_t> *basicRNNLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize,
974 timeSteps, rememberState);
975 basicRNNLayer->Initialize();
978 if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
990 template <
typename Architecture_t,
typename Layer_t>
991 void MethodDL::ParseLstmLayer(DNN::TDeepNet<Architecture_t, Layer_t> & ,
992 std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & , TString layerString,
996 TObjArray *subStrings = layerString.Tokenize(delim);
997 TIter nextToken(subStrings);
998 TObjString *token = (TObjString *)nextToken();
1001 for (; token !=
nullptr; token = (TObjString *)nextToken()) {
1010 MethodDL::MethodDL(
const TString &jobName,
const TString &methodTitle, DataSetInfo &theData,
const TString &theOption)
1011 : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1012 fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1013 fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1014 fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1015 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1023 MethodDL::MethodDL(DataSetInfo &theData,
const TString &theWeightFile)
1024 : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1025 fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1026 fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1027 fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1028 fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1036 MethodDL::~MethodDL()
1043 auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1046 parseString.ReplaceAll(
" ",
"");
1047 KeyValueVector_t blockKeyValues;
1048 const TString keyValueDelim(
"=");
1050 TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1051 TIter nextBlock(blockStrings);
1052 TObjString *blockString = (TObjString *)nextBlock();
1054 for (; blockString !=
nullptr; blockString = (TObjString *)nextBlock()) {
1055 blockKeyValues.push_back(std::map<TString, TString>());
1056 std::map<TString, TString> ¤tBlock = blockKeyValues.back();
1058 TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1059 TIter nextToken(subStrings);
1060 TObjString *token = (TObjString *)nextToken();
1062 for (; token !=
nullptr; token = (TObjString *)nextToken()) {
1063 TString strKeyValue(token->GetString());
1064 int delimPos = strKeyValue.First(keyValueDelim.Data());
1065 if (delimPos <= 0)
continue;
1067 TString strKey = TString(strKeyValue(0, delimPos));
1069 TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1071 strKey.Strip(TString::kBoth,
' ');
1072 strValue.Strip(TString::kBoth,
' ');
1074 currentBlock.insert(std::make_pair(strKey, strValue));
1077 return blockKeyValues;
1082 Bool_t MethodDL::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
1084 if (type == Types::kClassification && numberClasses == 2)
return kTRUE;
1085 if (type == Types::kMulticlass)
return kTRUE;
1086 if (type == Types::kRegression)
return kTRUE;
1099 UInt_t TMVA::MethodDL::GetNumValidationSamples()
1101 Int_t nValidationSamples = 0;
1102 UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1106 if (fNumValidationString.EndsWith(
"%")) {
1108 TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing,
'%'));
1110 if (intValStr.IsFloat()) {
1111 Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1112 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1114 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString
1115 <<
"\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1117 }
else if (fNumValidationString.IsFloat()) {
1118 Double_t valSizeAsDouble = fNumValidationString.Atof();
1120 if (valSizeAsDouble < 1.0) {
1122 nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1125 nValidationSamples = valSizeAsDouble;
1128 Log() << kFATAL <<
"Cannot parse number \"" << fNumValidationString <<
"\". Expected string like \"0.2\" or \"100\"."
1134 if (nValidationSamples < 0) {
1135 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is negative." << Endl;
1138 if (nValidationSamples == 0) {
1139 Log() << kFATAL <<
"Validation size \"" << fNumValidationString <<
"\" is zero." << Endl;
1142 if (nValidationSamples >= (Int_t)trainingSetSize) {
1143 Log() << kFATAL <<
"Validation size \"" << fNumValidationString
1144 <<
"\" is larger than or equal in size to training set (size=\"" << trainingSetSize <<
"\")." << Endl;
1147 return nValidationSamples;
1154 template <
typename Architecture_t>
1155 void MethodDL::TrainDeepNet()
1158 using Scalar_t =
typename Architecture_t::Scalar_t;
1159 using Layer_t = TMVA::DNN::VGeneralLayer<Architecture_t>;
1160 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t, Layer_t>;
1161 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1163 bool debug = Log().GetMinType() == kDEBUG;
1175 Architecture_t::SetRandomSeed(fRandomSeed);
1180 size_t nValidationSamples = GetNumValidationSamples();
1181 size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1183 const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1184 const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1185 const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1187 size_t trainingPhase = 1;
1189 for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1191 size_t nThreads = 1;
1195 size_t batchSize = settings.batchSize;
1196 this->SetBatchSize(batchSize);
1198 size_t inputDepth = this->GetInputDepth();
1199 size_t inputHeight = this->GetInputHeight();
1200 size_t inputWidth = this->GetInputWidth();
1201 size_t batchDepth = this->GetBatchDepth();
1202 size_t batchHeight = this->GetBatchHeight();
1203 size_t batchWidth = this->GetBatchWidth();
1204 ELossFunction J = this->GetLossFunction();
1205 EInitialization I = this->GetWeightInitialization();
1206 ERegularization R = settings.regularization;
1207 EOptimizer O = settings.optimizer;
1208 Scalar_t weightDecay = settings.weightDecay;
1216 if (batchDepth != batchSize && batchDepth > 1) {
1217 Error(
"Train",
"Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1220 if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1221 Error(
"Train",
"Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1227 bool badLayout =
false;
1229 if (batchDepth == batchSize)
1230 badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1232 if (batchHeight == batchSize && batchDepth == 1)
1233 badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1235 Error(
"Train",
"Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1236 inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1241 if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1242 Log() << kFATAL <<
"Number of samples in the datasets are train: ("
1243 << nTrainingSamples <<
") test: (" << nValidationSamples
1244 <<
"). One of these is smaller than the batch size of "
1245 << settings.batchSize <<
". Please increase the batch"
1246 <<
" size to be at least the same size as the smallest"
1247 <<
" of them." << Endl;
1250 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1254 if (trainingPhase == 1) {
1255 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1256 batchHeight, batchWidth, J, I, R, weightDecay));
1263 std::vector<DeepNet_t> nets{};
1264 nets.reserve(nThreads);
1265 for (
size_t i = 0; i < nThreads; i++) {
1267 nets.push_back(deepNet);
1272 CreateDeepNet(deepNet, nets);
1277 std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1278 for (
auto & p : dropoutVector) {
1281 deepNet.SetDropoutProbabilities(dropoutVector);
1283 if (trainingPhase > 1) {
1285 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1286 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1297 fXInput = ArchitectureImpl_t::CreateTensor(fNet->GetBatchSize(), GetInputDepth(), GetInputHeight(), GetInputWidth() );
1298 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1299 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1300 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1304 fYHat = std::unique_ptr<MatrixImpl_t>(
new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1307 Log() <<
"***** Deep Learning Network *****" << Endl;
1308 if (Log().GetMinType() <= kINFO)
1311 Log() <<
"Using " << nTrainingSamples <<
" events for training and " << nValidationSamples <<
" for testing" << Endl;
1314 TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1315 TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1316 {inputDepth, inputHeight, inputWidth},
1317 {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1318 deepNet.GetOutputWidth(), nThreads);
1320 TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1321 TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1322 {inputDepth, inputHeight, inputWidth},
1323 { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1324 deepNet.GetOutputWidth(), nThreads);
1330 Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1332 Double_t minValError = 0.0;
1333 Log() <<
"Compute initial loss on the validation data " << Endl;
1334 for (
auto batch : validationData) {
1335 auto inputTensor = batch.GetInput();
1336 auto outputMatrix = batch.GetOutput();
1337 auto weights = batch.GetWeights();
1341 minValError += deepNet.Loss(inputTensor, outputMatrix, weights,
false, includeRegularization);
1344 Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1345 minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1346 minValError += regzTerm;
1350 std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1355 case EOptimizer::kSGD:
1356 optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1357 new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1360 case EOptimizer::kAdam:
1361 optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1362 new DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1365 case EOptimizer::kAdagrad:
1366 optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1367 new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1370 case EOptimizer::kRMSProp:
1371 optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1372 new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum));
1375 case EOptimizer::kAdadelta:
1376 optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1377 new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1383 std::vector<TTensorBatch<Architecture_t>> batches{};
1385 bool converged =
false;
1386 size_t convergenceCount = 0;
1387 size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1390 std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1391 tstart = std::chrono::system_clock::now();
1393 Log() <<
"Training phase " << trainingPhase <<
" of " << this->GetTrainingSettings().size() <<
": "
1394 <<
" Optimizer " << settings.optimizerName
1395 <<
" Learning rate = " << settings.learningRate
1396 <<
" regularization " << (char) settings.regularization
1397 <<
" minimum error = " << minValError
1399 if (!fInteractive) {
1400 std::string separator(62,
'-');
1401 Log() << separator << Endl;
1402 Log() << std::setw(10) <<
"Epoch"
1403 <<
" | " << std::setw(12) <<
"Train Err." << std::setw(12) <<
"Val. Err."
1404 << std::setw(12) <<
"t(s)/epoch" << std::setw(12) <<
"t(s)/Loss"
1405 << std::setw(12) <<
"nEvents/s"
1406 << std::setw(12) <<
"Conv. Steps" << Endl;
1407 Log() << separator << Endl;
1412 size_t shuffleSeed = 0;
1413 if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1414 RandomGenerator<TRandom3> rng(shuffleSeed);
1417 if (fBuildNet && debug) {
1418 Log() <<
"Initial Deep Net Weights " << Endl;
1419 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1420 for (
size_t l = 0; l < weights_tensor.size(); ++l)
1421 weights_tensor[l].Print();
1422 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1423 bias_tensor[0].Print();
1426 Log() <<
" Start epoch iteration ..." << Endl;
1427 bool debugFirstEpoch =
false;
1428 bool computeLossInTraining =
true;
1429 while (!converged) {
1430 optimizer->IncrementGlobalStep();
1431 trainingData.Shuffle(rng);
1436 Double_t trainingError = 0;
1437 for (
size_t i = 0; i < batchesInEpoch; ++i ) {
1444 if (debugFirstEpoch) std::cout <<
"\n\n----- batch # " << i <<
"\n\n";
1446 auto my_batch = trainingData.GetTensorBatch();
1448 if (debugFirstEpoch)
1449 std::cout <<
"got batch data - doing forward \n";
1453 Architecture_t::PrintTensor(my_batch.GetInput(),
"input tensor",
true);
1454 typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1455 typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1456 Architecture_t::PrintTensor(tOut,
"label tensor",
true) ;
1457 Architecture_t::PrintTensor(tW,
"weight tensor",
true) ;
1460 deepNet.Forward(my_batch.GetInput(),
true);
1462 if (computeLossInTraining) {
1463 auto outputMatrix = my_batch.GetOutput();
1464 auto weights = my_batch.GetWeights();
1465 trainingError += deepNet.Loss(outputMatrix, weights,
false);
1468 if (debugFirstEpoch)
1469 std::cout <<
"- doing backward \n";
1472 size_t nlayers = deepNet.GetLayers().size();
1473 for (
size_t l = 0; l < nlayers; ++l) {
1474 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1475 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1476 TString::Format(
"initial weights layer %d", l).Data());
1478 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1479 TString::Format(
"output tensor layer %d", l).Data());
1485 deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1487 if (debugFirstEpoch)
1488 std::cout <<
"- doing optimizer update \n";
1493 std::cout <<
"minmimizer step - momentum " << settings.momentum <<
" learning rate " << optimizer->GetLearningRate() << std::endl;
1494 for (
size_t l = 0; l < nlayers; ++l) {
1495 if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1496 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format(
"weights after step layer %d",l).Data());
1497 Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),
"weight gradients");
1504 if (debugFirstEpoch) std::cout <<
"\n End batch loop - compute validation loss \n";
1506 debugFirstEpoch =
false;
1507 if ((optimizer->GetGlobalStep() % settings.testInterval) == 0) {
1509 std::chrono::time_point<std::chrono::system_clock> t1,t2;
1511 t1 = std::chrono::system_clock::now();
1516 Double_t valError = 0.0;
1517 bool inTraining =
false;
1518 for (
auto batch : validationData) {
1519 auto inputTensor = batch.GetInput();
1520 auto outputMatrix = batch.GetOutput();
1521 auto weights = batch.GetWeights();
1523 valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1526 Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1527 valError /= (Double_t)(nValidationSamples / settings.batchSize);
1528 valError += regTerm;
1531 fTrainHistory.AddValue(
"valError",optimizer->GetGlobalStep(),valError);
1533 t2 = std::chrono::system_clock::now();
1536 if (valError < minValError) {
1537 convergenceCount = 0;
1539 convergenceCount += settings.testInterval;
1543 if (valError < minValError ) {
1545 Log() << std::setw(10) << optimizer->GetGlobalStep()
1546 <<
" Minimum Test error found - save the configuration " << Endl;
1547 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1548 fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1557 minValError = valError;
1559 else if ( minValError <= 0. )
1560 minValError = valError;
1562 if (!computeLossInTraining) {
1563 trainingError = 0.0;
1565 for (
auto batch : trainingData) {
1566 auto inputTensor = batch.GetInput();
1567 auto outputMatrix = batch.GetOutput();
1568 auto weights = batch.GetWeights();
1569 trainingError += deepNet.Loss(inputTensor, outputMatrix, weights,
false,
false);
1573 trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1574 trainingError += regTerm;
1577 fTrainHistory.AddValue(
"trainingError",optimizer->GetGlobalStep(),trainingError);
1580 tend = std::chrono::system_clock::now();
1583 std::chrono::duration<double> elapsed_seconds = tend - tstart;
1584 std::chrono::duration<double> elapsed1 = t1-tstart;
1587 std::chrono::duration<double> elapsed_testing = tend-t1;
1589 double seconds = elapsed_seconds.count();
1592 double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1595 convergenceCount > settings.convergenceSteps || optimizer->GetGlobalStep() >= settings.maxEpochs;
1598 Log() << std::setw(10) << optimizer->GetGlobalStep() <<
" | "
1599 << std::setw(12) << trainingError
1600 << std::setw(12) << valError
1601 << std::setw(12) << seconds / settings.testInterval
1602 << std::setw(12) << elapsed_testing.count()
1603 << std::setw(12) << 1. / eventTime
1604 << std::setw(12) << convergenceCount
1610 tstart = std::chrono::system_clock::now();
1614 if (converged && debug) {
1615 Log() <<
"Final Deep Net Weights for phase " << trainingPhase <<
" epoch " << optimizer->GetGlobalStep()
1617 auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1618 auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1619 for (
size_t l = 0; l < weights_tensor.size(); ++l)
1620 weights_tensor[l].Print();
1621 bias_tensor[0].Print();
1631 void MethodDL::Train()
1634 Log() << kFATAL <<
"Not implemented yet" << Endl;
1639 if (this->GetArchitectureString() ==
"GPU") {
1640 #ifdef R__HAS_TMVAGPU
1641 Log() << kINFO <<
"Start of deep neural network training on GPU." << Endl << Endl;
1643 TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1645 TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1648 Log() << kFATAL <<
"CUDA backend not enabled. Please make sure "
1649 "you have CUDA installed and it was successfully "
1650 "detected by CMAKE."
1654 }
else if (this->GetArchitectureString() ==
"OPENCL") {
1655 Log() << kFATAL <<
"OPENCL backend not yet supported." << Endl;
1657 }
else if (this->GetArchitectureString() ==
"CPU") {
1658 #ifdef R__HAS_TMVACPU
1661 Log() << kINFO <<
"Start of deep neural network training on CPU using (for ROOT-IMT) nthreads = "
1662 << gConfig().GetNCpu() << Endl << Endl;
1663 TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1665 Log() << kFATAL <<
"Multi-core CPU backend not enabled. Please make sure "
1666 "you have a BLAS implementation and it was successfully "
1667 "detected by CMake as well that the imt CMake flag is set."
1671 }
else if (this->GetArchitectureString() ==
"STANDARD") {
1672 Log() << kINFO <<
"Start of deep neural network training on the STANDARD architecture" << Endl << Endl;
1674 TrainDeepNet<DNN::TReference<ScalarImpl_t> >();
1678 Log() << kFATAL << this->GetArchitectureString() <<
1679 " is not a supported archiectire for TMVA::MethodDL"
1697 Double_t MethodDL::GetMvaValue(Double_t * , Double_t * )
1702 if (!fNet || fNet->GetDepth() == 0) {
1703 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1708 R__ASSERT(fNet->GetBatchSize() == 1);
1717 const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1719 size_t nVariables = GetEvent()->GetNVariables();
1722 if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1723 R__ASSERT(fXInput.GetShape().size() < 4);
1725 if (fXInput.GetShape().size() == 2) {
1726 nc = fXInput.GetShape()[0];
1728 ArchitectureImpl_t::PrintTensor(fXInput);
1729 Log() << kFATAL <<
"First tensor dimension should be equal to batch size, i.e. = 1"
1732 nhw = fXInput.GetShape()[1];
1734 nc = fXInput.GetCSize();
1735 nhw = fXInput.GetWSize();
1737 if ( nVariables != nc * nhw) {
1738 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1739 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nhw
1742 for (
size_t j = 0; j < nc; j++) {
1743 for (
size_t k = 0; k < nhw; k++) {
1745 fXInputBuffer[ k * nc + j] = inputValues[j*nhw + k];
1750 assert(fXInput.GetShape().size() >= 4);
1751 size_t nc = fXInput.GetCSize();
1752 size_t nh = fXInput.GetHSize();
1753 size_t nw = fXInput.GetWSize();
1754 size_t n = nc * nh * nw;
1755 if ( nVariables != n) {
1756 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1757 <<
" n-event variables " << nVariables <<
" expected input tensor " << nc <<
" x " << nh <<
" x " << nw
1760 for (
size_t j = 0; j < n; j++) {
1762 fXInputBuffer[ j ] = inputValues[j];
1766 fXInput.GetDeviceBuffer().CopyFrom( fXInputBuffer);
1769 fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1772 double mvaValue = (*fYHat)(0, 0);
1775 #ifdef DEBUG_MVAVALUE
1776 using Tensor_t = std::vector<MatrixImpl_t>;
1777 TMatrixF xInput(n1,n2, inputValues.data() );
1778 std::cout <<
"Input data - class " << GetEvent()->GetClass() << std::endl;
1780 std::cout <<
"Output of DeepNet " << mvaValue << std::endl;
1781 auto & deepnet = *fNet;
1782 std::cout <<
"Loop on layers " << std::endl;
1783 for (
int l = 0; l < deepnet.GetDepth(); ++l) {
1784 std::cout <<
"Layer " << l;
1785 const auto * layer = deepnet.GetLayerAt(l);
1786 const Tensor_t & layer_output = layer->GetOutput();
1788 std::cout <<
"DNN output " << layer_output.size() << std::endl;
1789 for (
size_t i = 0; i < layer_output.size(); ++i) {
1790 #ifdef R__HAS_TMVAGPU
1792 TMatrixD m = layer_output[i];
1794 TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1798 const Tensor_t & layer_weights = layer->GetWeights();
1799 std::cout <<
"DNN weights " << layer_weights.size() << std::endl;
1800 if (layer_weights.size() > 0) {
1802 #ifdef R__HAS_TMVAGPU
1803 TMatrixD m = layer_weights[i];
1806 TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1813 return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1818 template <
typename Architecture_t>
1819 std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt,
size_t batchSize, Bool_t logProgress)
1823 if (!fNet || fNet->GetDepth() == 0) {
1824 Log() << kFATAL <<
"The network has not been trained and fNet is not built"
1829 this->SetBatchSize(batchSize);
1830 size_t inputDepth = this->GetInputDepth();
1831 size_t inputHeight = this->GetInputHeight();
1832 size_t inputWidth = this->GetInputWidth();
1833 size_t batchDepth = this->GetBatchDepth();
1834 size_t batchHeight = this->GetBatchHeight();
1835 size_t batchWidth = this->GetBatchWidth();
1836 ELossFunction J = fNet->GetLossFunction();
1837 EInitialization I = fNet->GetInitialization();
1838 ERegularization R = fNet->GetRegularization();
1839 Double_t weightDecay = fNet->GetWeightDecay();
1841 using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1842 using Matrix_t =
typename Architecture_t::Matrix_t;
1843 using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1846 DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1847 std::vector<DeepNet_t> nets{};
1849 CreateDeepNet(deepNet,nets);
1852 for (
size_t i = 0; i < deepNet.GetDepth(); ++i) {
1853 deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1860 size_t n1 = deepNet.GetBatchHeight();
1861 size_t n2 = deepNet.GetBatchWidth();
1862 size_t n0 = deepNet.GetBatchSize();
1864 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1865 n1 = deepNet.GetBatchSize();
1869 Long64_t nEvents = lastEvt - firstEvt;
1870 TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1871 TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1879 Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1882 Timer timer( nEvents, GetName(), kTRUE );
1885 Log() << kHEADER << Form(
"[%s] : ",DataInfo().GetName())
1886 <<
"Evaluation of " << GetMethodName() <<
" on "
1887 << (Data()->GetCurrentType() == Types::kTraining ?
"training" :
"testing")
1888 <<
" sample (" << nEvents <<
" events)" << Endl;
1892 std::vector<double> mvaValues(nEvents);
1895 for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1897 Long64_t ievt_end = ievt + batchSize;
1899 if (ievt_end <= lastEvt) {
1901 if (ievt == firstEvt) {
1902 Data()->SetCurrentEvent(ievt);
1903 size_t nVariables = GetEvent()->GetNVariables();
1905 if (n1 == batchSize && n0 == 1) {
1906 if (n2 != nVariables) {
1907 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1908 <<
" n-event variables " << nVariables <<
" expected input matrix " << n1 <<
" x " << n2
1912 if (n1*n2 != nVariables || n0 != batchSize) {
1913 Log() << kFATAL <<
"Input Event variable dimensions are not compatible with the built network architecture"
1914 <<
" n-event variables " << nVariables <<
" expected input tensor " << n0 <<
" x " << n1 <<
" x " << n2
1920 auto batch = testData.GetTensorBatch();
1921 auto inputTensor = batch.GetInput();
1923 auto xInput = batch.GetInput();
1925 deepNet.Prediction(yHat, xInput, fOutputFunction);
1926 for (
size_t i = 0; i < batchSize; ++i) {
1927 double value = yHat(i,0);
1928 mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1933 for (Long64_t i = ievt; i < lastEvt; ++i) {
1934 Data()->SetCurrentEvent(i);
1935 mvaValues[i] = GetMvaValue();
1942 <<
"Elapsed time for evaluation of " << nEvents <<
" events: "
1943 << timer.GetElapsedTime() <<
" " << Endl;
1949 const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1951 size_t nVariables = GetEvent()->GetNVariables();
1952 MatrixImpl_t X(1, nVariables);
1953 TensorImpl_t X_vec ( 1, 1, nVariables);
1954 const Event *ev = GetEvent();
1955 const std::vector<Float_t>& inputValues = ev->GetValues();
1956 for (
size_t i = 0; i < nVariables; i++) {
1957 X_vec(0,i,0) = inputValues[i];
1961 size_t nTargets = std::max(1u, ev->GetNTargets());
1962 MatrixImpl_t YHat(1, nTargets);
1963 std::vector<Float_t> output(nTargets);
1964 fNet->Prediction(YHat, X_vec, fOutputFunction);
1966 for (
size_t i = 0; i < nTargets; i++)
1967 output[i] = YHat(0, i);
1969 if (fRegressionReturnVal == NULL) {
1970 fRegressionReturnVal =
new std::vector<Float_t>();
1972 fRegressionReturnVal->clear();
1974 Event * evT =
new Event(*ev);
1975 for (
size_t i = 0; i < nTargets; ++i) {
1976 evT->SetTarget(i, output[i]);
1979 const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1980 for (
size_t i = 0; i < nTargets; ++i) {
1981 fRegressionReturnVal->push_back(evT2->GetTarget(i));
1984 return *fRegressionReturnVal;
1987 const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1989 size_t nVariables = GetEvent()->GetNVariables();
1990 MatrixImpl_t X(1, nVariables);
1991 TensorImpl_t X_vec ( 1, 1, nVariables);
1992 MatrixImpl_t YHat(1, DataInfo().GetNClasses());
1993 if (fMulticlassReturnVal == NULL) {
1994 fMulticlassReturnVal =
new std::vector<Float_t>(DataInfo().GetNClasses());
1997 const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1998 for (
size_t i = 0; i < nVariables; i++) {
1999 X_vec(0,i, 0) = inputValues[i];
2002 fNet->Prediction(YHat, X_vec, fOutputFunction);
2003 for (
size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
2004 (*fMulticlassReturnVal)[i] = YHat(0, i);
2006 return *fMulticlassReturnVal;
2013 std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2024 Long64_t nEvents = Data()->GetNEvents();
2025 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2026 if (firstEvt < 0) firstEvt = 0;
2027 nEvents = lastEvt-firstEvt;
2030 size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2031 size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2032 if (
size_t(nEvents) < batchSize ) batchSize = nEvents;
2035 if (this->GetArchitectureString() ==
"GPU") {
2036 #ifdef R__HAS_TMVAGPU
2037 Log() << kINFO <<
"Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2039 return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2041 return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2045 }
else if (this->GetArchitectureString() ==
"CPU") {
2047 Log() << kINFO <<
"Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2048 return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2051 Log() << kINFO <<
"ERROR: STANDARD architecture is not supported anymore for MethodDL ! "
2056 return std::vector<Double_t>(nEvents,TMath::QuietNaN());
2061 void MethodDL::AddWeightsXMLTo(
void * parent)
const
2064 auto & xmlEngine = gTools().xmlengine();
2065 void* nn = xmlEngine.NewChild(parent, 0,
"Weights");
2071 Int_t depth = fNet->GetDepth();
2073 Int_t inputDepth = fNet->GetInputDepth();
2074 Int_t inputHeight = fNet->GetInputHeight();
2075 Int_t inputWidth = fNet->GetInputWidth();
2077 Int_t batchSize = fNet->GetBatchSize();
2079 Int_t batchDepth = fNet->GetBatchDepth();
2080 Int_t batchHeight = fNet->GetBatchHeight();
2081 Int_t batchWidth = fNet->GetBatchWidth();
2083 char lossFunction =
static_cast<char>(fNet->GetLossFunction());
2084 char initialization =
static_cast<char>(fNet->GetInitialization());
2085 char regularization =
static_cast<char>(fNet->GetRegularization());
2087 Double_t weightDecay = fNet->GetWeightDecay();
2090 char outputFunction =
static_cast<char>(this->GetOutputFunction());
2094 xmlEngine.NewAttr(nn, 0,
"NetDepth", gTools().StringFromInt(depth));
2096 xmlEngine.NewAttr(nn, 0,
"InputDepth", gTools().StringFromInt(inputDepth));
2097 xmlEngine.NewAttr(nn, 0,
"InputHeight", gTools().StringFromInt(inputHeight));
2098 xmlEngine.NewAttr(nn, 0,
"InputWidth", gTools().StringFromInt(inputWidth));
2100 xmlEngine.NewAttr(nn, 0,
"BatchSize", gTools().StringFromInt(batchSize));
2101 xmlEngine.NewAttr(nn, 0,
"BatchDepth", gTools().StringFromInt(batchDepth));
2102 xmlEngine.NewAttr(nn, 0,
"BatchHeight", gTools().StringFromInt(batchHeight));
2103 xmlEngine.NewAttr(nn, 0,
"BatchWidth", gTools().StringFromInt(batchWidth));
2105 xmlEngine.NewAttr(nn, 0,
"LossFunction", TString(lossFunction));
2106 xmlEngine.NewAttr(nn, 0,
"Initialization", TString(initialization));
2107 xmlEngine.NewAttr(nn, 0,
"Regularization", TString(regularization));
2108 xmlEngine.NewAttr(nn, 0,
"OutputFunction", TString(outputFunction));
2110 gTools().AddAttr(nn,
"WeightDecay", weightDecay);
2113 for (Int_t i = 0; i < depth; i++)
2115 fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2122 void MethodDL::ReadWeightsFromXML(
void * rootXML)
2125 auto netXML = gTools().GetChild(rootXML,
"Weights");
2131 gTools().ReadAttr(netXML,
"NetDepth", netDepth);
2133 size_t inputDepth, inputHeight, inputWidth;
2134 gTools().ReadAttr(netXML,
"InputDepth", inputDepth);
2135 gTools().ReadAttr(netXML,
"InputHeight", inputHeight);
2136 gTools().ReadAttr(netXML,
"InputWidth", inputWidth);
2138 size_t batchSize, batchDepth, batchHeight, batchWidth;
2139 gTools().ReadAttr(netXML,
"BatchSize", batchSize);
2142 gTools().ReadAttr(netXML,
"BatchDepth", batchDepth);
2143 gTools().ReadAttr(netXML,
"BatchHeight", batchHeight);
2144 gTools().ReadAttr(netXML,
"BatchWidth", batchWidth);
2146 char lossFunctionChar;
2147 gTools().ReadAttr(netXML,
"LossFunction", lossFunctionChar);
2148 char initializationChar;
2149 gTools().ReadAttr(netXML,
"Initialization", initializationChar);
2150 char regularizationChar;
2151 gTools().ReadAttr(netXML,
"Regularization", regularizationChar);
2152 char outputFunctionChar;
2153 gTools().ReadAttr(netXML,
"OutputFunction", outputFunctionChar);
2155 gTools().ReadAttr(netXML,
"WeightDecay", weightDecay);
2160 this->SetInputDepth(inputDepth);
2161 this->SetInputHeight(inputHeight);
2162 this->SetInputWidth(inputWidth);
2163 this->SetBatchDepth(batchDepth);
2164 this->SetBatchHeight(batchHeight);
2165 this->SetBatchWidth(batchWidth);
2169 fNet = std::unique_ptr<DeepNetImpl_t>(
new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2170 batchHeight, batchWidth,
2171 static_cast<ELossFunction>(lossFunctionChar),
2172 static_cast<EInitialization>(initializationChar),
2173 static_cast<ERegularization>(regularizationChar),
2176 fOutputFunction =
static_cast<EOutputFunction
>(outputFunctionChar);
2180 auto layerXML = gTools().xmlengine().GetChild(netXML);
2183 for (
size_t i = 0; i < netDepth; i++) {
2185 TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2188 if (layerName ==
"DenseLayer") {
2192 gTools().ReadAttr(layerXML,
"Width", width);
2196 gTools().ReadAttr(layerXML,
"ActivationFunction", funcString);
2197 EActivationFunction func =
static_cast<EActivationFunction
>(funcString.Atoi());
2200 fNet->AddDenseLayer(width, func, 0.0);
2204 else if (layerName ==
"ConvLayer") {
2208 gTools().ReadAttr(layerXML,
"Depth", depth);
2209 size_t fltHeight, fltWidth = 0;
2210 size_t strideRows, strideCols = 0;
2211 size_t padHeight, padWidth = 0;
2212 gTools().ReadAttr(layerXML,
"FilterHeight", fltHeight);
2213 gTools().ReadAttr(layerXML,
"FilterWidth", fltWidth);
2214 gTools().ReadAttr(layerXML,
"StrideRows", strideRows);
2215 gTools().ReadAttr(layerXML,
"StrideCols", strideCols);
2216 gTools().ReadAttr(layerXML,
"PaddingHeight", padHeight);
2217 gTools().ReadAttr(layerXML,
"PaddingWidth", padWidth);
2221 gTools().ReadAttr(layerXML,
"ActivationFunction", funcString);
2222 EActivationFunction actFunction =
static_cast<EActivationFunction
>(funcString.Atoi());
2225 fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2226 padHeight, padWidth, actFunction);
2231 else if (layerName ==
"MaxPoolLayer") {
2234 size_t filterHeight, filterWidth = 0;
2235 size_t strideRows, strideCols = 0;
2236 gTools().ReadAttr(layerXML,
"FilterHeight", filterHeight);
2237 gTools().ReadAttr(layerXML,
"FilterWidth", filterWidth);
2238 gTools().ReadAttr(layerXML,
"StrideRows", strideRows);
2239 gTools().ReadAttr(layerXML,
"StrideCols", strideCols);
2241 fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2243 else if (layerName ==
"ReshapeLayer") {
2246 size_t depth, height, width = 0;
2247 gTools().ReadAttr(layerXML,
"Depth", depth);
2248 gTools().ReadAttr(layerXML,
"Height", height);
2249 gTools().ReadAttr(layerXML,
"Width", width);
2251 gTools().ReadAttr(layerXML,
"Flattening",flattening );
2253 fNet->AddReshapeLayer(depth, height, width, flattening);
2256 else if (layerName ==
"RNNLayer") {
2259 size_t stateSize,inputSize, timeSteps = 0;
2260 int rememberState= 0;
2261 gTools().ReadAttr(layerXML,
"StateSize", stateSize);
2262 gTools().ReadAttr(layerXML,
"InputSize", inputSize);
2263 gTools().ReadAttr(layerXML,
"TimeSteps", timeSteps);
2264 gTools().ReadAttr(layerXML,
"RememberState", rememberState );
2266 fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
2270 else if (layerName ==
"BatchNormLayer") {
2272 fNet->AddBatchNormLayer(0., 0.0);
2277 fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2280 layerXML = gTools().GetNextChild(layerXML);
2290 fXInput = ArchitectureImpl_t::CreateTensor(fNet->GetBatchSize(), GetInputDepth(), GetInputHeight(), GetInputWidth() );
2291 if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2293 fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth(),TMVA::Experimental::MemoryLayout::ColumnMajor );
2294 fXInputBuffer = HostBufferImpl_t( fXInput.GetSize());
2297 fYHat = std::unique_ptr<MatrixImpl_t>(
new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2304 void MethodDL::ReadWeightsFromStream(std::istream & )
2309 const Ranking *TMVA::MethodDL::CreateRanking()
2316 void MethodDL::GetHelpMessage()
const