Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodDL.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/cnn:$Id$Ndl
2 // Authors: Vladimir Ilievski, Lorenzo Moneta, Saurav Shekhar, Ravi Kiran
3 /**********************************************************************************
4  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5  * Package: TMVA *
6  * Class : MethodDL *
7  * Web : http://tmva.sourceforge.net *
8  * *
9  * Description: *
10  * Deep Neural Network Method *
11  * *
12  * Authors (alphabetical): *
13  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
14  * Saurav Shekhar <sauravshekhar01@gmail.com> - ETH Zurich, Switzerland *
15  * Ravi Kiran S <sravikiran0606@gmail.com> - CERN, Switzerland *
16  * *
17  * Copyright (c) 2005-2015: *
18  * CERN, Switzerland *
19  * U. of Victoria, Canada *
20  * MPI-K Heidelberg, Germany *
21  * U. of Bonn, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 #include "TFormula.h"
29 #include "TString.h"
30 #include "TMath.h"
31 
32 #include "TMVA/Tools.h"
33 #include "TMVA/Configurable.h"
34 #include "TMVA/IMethod.h"
35 #include "TMVA/ClassifierFactory.h"
36 #include "TMVA/MethodDL.h"
37 #include "TMVA/Types.h"
39 #include "TMVA/DNN/Functions.h"
40 #include "TMVA/DNN/DLMinimizers.h"
41 #include "TMVA/DNN/SGD.h"
42 #include "TMVA/DNN/Adam.h"
43 #include "TMVA/DNN/Adagrad.h"
44 #include "TMVA/DNN/RMSProp.h"
45 #include "TMVA/DNN/Adadelta.h"
46 #include "TMVA/Timer.h"
47 
48 #include "TStopwatch.h"
49 
50 #include <chrono>
51 
52 REGISTER_METHOD(DL)
53 ClassImp(TMVA::MethodDL);
54 
55 using namespace TMVA::DNN::CNN;
56 using namespace TMVA::DNN;
57 
58 using TMVA::DNN::EActivationFunction;
59 using TMVA::DNN::ELossFunction;
60 using TMVA::DNN::EInitialization;
61 using TMVA::DNN::EOutputFunction;
62 using TMVA::DNN::EOptimizer;
63 
64 
65 namespace TMVA {
66 
67 
68 ////////////////////////////////////////////////////////////////////////////////
69 TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key)
70 {
71  key.ToUpper();
72  std::map<TString, TString>::const_iterator it = keyValueMap.find(key);
73  if (it == keyValueMap.end()) {
74  return TString("");
75  }
76  return it->second;
77 }
78 
79 ////////////////////////////////////////////////////////////////////////////////
80 template <typename T>
81 T fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, T defaultValue);
82 
83 ////////////////////////////////////////////////////////////////////////////////
84 template <>
85 int fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, int defaultValue)
86 {
87  TString value(fetchValueTmp(keyValueMap, key));
88  if (value == "") {
89  return defaultValue;
90  }
91  return value.Atoi();
92 }
93 
94 ////////////////////////////////////////////////////////////////////////////////
95 template <>
96 double fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, double defaultValue)
97 {
98  TString value(fetchValueTmp(keyValueMap, key));
99  if (value == "") {
100  return defaultValue;
101  }
102  return value.Atof();
103 }
104 
105 ////////////////////////////////////////////////////////////////////////////////
106 template <>
107 TString fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, TString defaultValue)
108 {
109  TString value(fetchValueTmp(keyValueMap, key));
110  if (value == "") {
111  return defaultValue;
112  }
113  return value;
114 }
115 
116 ////////////////////////////////////////////////////////////////////////////////
117 template <>
118 bool fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key, bool defaultValue)
119 {
120  TString value(fetchValueTmp(keyValueMap, key));
121  if (value == "") {
122  return defaultValue;
123  }
124 
125  value.ToUpper();
126  if (value == "TRUE" || value == "T" || value == "1") {
127  return true;
128  }
129 
130  return false;
131 }
132 
133 ////////////////////////////////////////////////////////////////////////////////
134 template <>
135 std::vector<double> fetchValueTmp(const std::map<TString, TString> &keyValueMap, TString key,
136  std::vector<double> defaultValue)
137 {
138  TString parseString(fetchValueTmp(keyValueMap, key));
139  if (parseString == "") {
140  return defaultValue;
141  }
142 
143  parseString.ToUpper();
144  std::vector<double> values;
145 
146  const TString tokenDelim("+");
147  TObjArray *tokenStrings = parseString.Tokenize(tokenDelim);
148  TIter nextToken(tokenStrings);
149  TObjString *tokenString = (TObjString *)nextToken();
150  for (; tokenString != NULL; tokenString = (TObjString *)nextToken()) {
151  std::stringstream sstr;
152  double currentValue;
153  sstr << tokenString->GetString().Data();
154  sstr >> currentValue;
155  values.push_back(currentValue);
156  }
157  return values;
158 }
159 
160 ////////////////////////////////////////////////////////////////////////////////
161 void MethodDL::DeclareOptions()
162 {
163  // Set default values for all option strings
164 
165  DeclareOptionRef(fInputLayoutString = "0|0|0", "InputLayout", "The Layout of the input");
166 
167  DeclareOptionRef(fBatchLayoutString = "0|0|0", "BatchLayout", "The Layout of the batch");
168 
169  DeclareOptionRef(fLayoutString = "DENSE|(N+100)*2|SOFTSIGN,DENSE|0|LINEAR", "Layout", "Layout of the network.");
170 
171  DeclareOptionRef(fErrorStrategy = "CROSSENTROPY", "ErrorStrategy", "Loss function: Mean squared error (regression)"
172  " or cross entropy (binary classification).");
173  AddPreDefVal(TString("CROSSENTROPY"));
174  AddPreDefVal(TString("SUMOFSQUARES"));
175  AddPreDefVal(TString("MUTUALEXCLUSIVE"));
176 
177  DeclareOptionRef(fWeightInitializationString = "XAVIER", "WeightInitialization", "Weight initialization strategy");
178  AddPreDefVal(TString("XAVIER"));
179  AddPreDefVal(TString("XAVIERUNIFORM"));
180  AddPreDefVal(TString("GAUSS"));
181  AddPreDefVal(TString("UNIFORM"));
182  AddPreDefVal(TString("IDENTITY"));
183  AddPreDefVal(TString("ZERO"));
184 
185  DeclareOptionRef(fRandomSeed = 0, "RandomSeed", "Random seed used for weight initialization and batch shuffling");
186 
187  DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
188  "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
189  "Specify as 100 to use exactly 100 events. (Default: 20%)");
190 
191  DeclareOptionRef(fArchitectureString = "CPU", "Architecture", "Which architecture to perform the training on.");
192  AddPreDefVal(TString("STANDARD"));
193  AddPreDefVal(TString("CPU"));
194  AddPreDefVal(TString("GPU"));
195  AddPreDefVal(TString("OPENCL"));
196  AddPreDefVal(TString("CUDNN"));
197 
198  // define training stratgey separated by a separator "|"
199  DeclareOptionRef(fTrainingStrategyString = "LearningRate=1e-1,"
200  "Momentum=0.3,"
201  "Repetitions=3,"
202  "ConvergenceSteps=50,"
203  "BatchSize=30,"
204  "TestRepetitions=7,"
205  "WeightDecay=0.0,"
206  "Regularization=None,"
207  "DropConfig=0.0,"
208  "DropRepetitions=5"
209  "|"
210  "LearningRate=1e-4,"
211  "Momentum=0.3,"
212  "Repetitions=3,"
213  "ConvergenceSteps=50,"
214  "MaxEpochs=2000,"
215  "BatchSize=20,"
216  "TestRepetitions=7,"
217  "WeightDecay=0.001,"
218  "Regularization=L2,"
219  "DropConfig=0.0+0.5+0.5,"
220  "DropRepetitions=5,"
221  "Multithreading=True",
222  "TrainingStrategy", "Defines the training strategies.");
223 }
224 
225 ////////////////////////////////////////////////////////////////////////////////
226 void MethodDL::ProcessOptions()
227 {
228 
229  if (IgnoreEventsWithNegWeightsInTraining()) {
230  Log() << kINFO << "Will ignore negative events in training!" << Endl;
231  }
232 
233  if (fArchitectureString == "STANDARD") {
234  Log() << kINFO << "The STANDARD architecture has been deprecated. "
235  "Please use Architecture=CPU or Architecture=CPU."
236  "See the TMVA Users' Guide for instructions if you "
237  "encounter problems."
238  << Endl;
239  }
240  if (fArchitectureString == "OPENCL") {
241  Log() << kERROR << "The OPENCL architecture has not been implemented yet. "
242  "Please use Architecture=CPU or Architecture=CPU for the "
243  "time being. See the TMVA Users' Guide for instructions "
244  "if you encounter problems."
245  << Endl;
246  }
247 
248  // the architecture can now be set at runtime as an option
249 
250 
251  if (fArchitectureString == "GPU") {
252 #ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
253  Log() << kERROR << "CUDA backend not enabled. Please make sure "
254  "you have CUDA installed and it was successfully "
255  "detected by CMAKE by using -Dtmva-gpu=On "
256  << Endl;
257 #ifdef R__HAS_TMVACPU
258  fArchitectureString = "CPU";
259  Log() << kINFO << "Will now use the CPU architecture !" << Endl;
260 #else
261  fArchitectureString = "Standard";
262  Log() << kINFO << "Will now use the Standard architecture !" << Endl;
263 #endif
264 #else
265  Log() << kINFO << "Will now use the GPU architecture !" << Endl;
266 #endif
267  }
268  else if (fArchitectureString == "CUDNN") {
269 #ifndef R__HAS_TMVAGPU // case TMVA does not support GPU
270  Log() << kERROR << "CUDA+CUDNN backend not enabled. Please make sure "
271  "you have CUDNN and CUDA installed and that the GPU capability/CUDA "
272  "was successfully detected by CMAKE by using -Dtmva-gpu=On"
273  << Endl;
274 #ifdef R__HAS_TMVACPU
275  fArchitectureString = "CPU";
276  Log() << kINFO << "Will now use the CPU architecture !" << Endl;
277 #else
278  fArchitectureString = "Standard";
279  Log() << kINFO << "Will now use the Standard architecture !" << Endl;
280 #endif
281 #else
282  Log() << kINFO << "Will now use the GPU architecture !" << Endl;
283 #endif
284  }
285 
286  else if (fArchitectureString == "CPU") {
287 #ifndef R__HAS_TMVACPU // TMVA has no CPU support
288  Log() << kERROR << "Multi-core CPU backend not enabled. Please make sure "
289  "you have a BLAS implementation and it was successfully "
290  "detected by CMake as well that the imt CMake flag is set."
291  << Endl;
292 #ifdef R__HAS_TMVAGPU
293  fArchitectureString = "GPU";
294  Log() << kINFO << "Will now use the GPU architecture !" << Endl;
295 #else
296  fArchitectureString = "STANDARD";
297  Log() << kINFO << "Will now use the Standard architecture !" << Endl;
298 #endif
299 #else
300  Log() << kINFO << "Will now use the CPU architecture !" << Endl;
301 #endif
302  }
303 
304  else {
305  Log() << kINFO << "Will use the deprecated STANDARD architecture !" << Endl;
306  fArchitectureString = "STANDARD";
307  }
308 
309  // Input Layout
310  ParseInputLayout();
311  ParseBatchLayout();
312 
313  // Loss function and output.
314  fOutputFunction = EOutputFunction::kSigmoid;
315  if (fAnalysisType == Types::kClassification) {
316  if (fErrorStrategy == "SUMOFSQUARES") {
317  fLossFunction = ELossFunction::kMeanSquaredError;
318  }
319  if (fErrorStrategy == "CROSSENTROPY") {
320  fLossFunction = ELossFunction::kCrossEntropy;
321  }
322  fOutputFunction = EOutputFunction::kSigmoid;
323  } else if (fAnalysisType == Types::kRegression) {
324  if (fErrorStrategy != "SUMOFSQUARES") {
325  Log() << kWARNING << "For regression only SUMOFSQUARES is a valid "
326  << " neural net error function. Setting error function to "
327  << " SUMOFSQUARES now." << Endl;
328  }
329 
330  fLossFunction = ELossFunction::kMeanSquaredError;
331  fOutputFunction = EOutputFunction::kIdentity;
332  } else if (fAnalysisType == Types::kMulticlass) {
333  if (fErrorStrategy == "SUMOFSQUARES") {
334  fLossFunction = ELossFunction::kMeanSquaredError;
335  }
336  if (fErrorStrategy == "CROSSENTROPY") {
337  fLossFunction = ELossFunction::kCrossEntropy;
338  }
339  if (fErrorStrategy == "MUTUALEXCLUSIVE") {
340  fLossFunction = ELossFunction::kSoftmaxCrossEntropy;
341  }
342  fOutputFunction = EOutputFunction::kSoftmax;
343  }
344 
345  // Initialization
346  // the biases will be always initialized to zero
347  if (fWeightInitializationString == "XAVIER") {
348  fWeightInitialization = DNN::EInitialization::kGlorotNormal;
349  } else if (fWeightInitializationString == "XAVIERUNIFORM") {
350  fWeightInitialization = DNN::EInitialization::kGlorotUniform;
351  } else if (fWeightInitializationString == "GAUSS") {
352  fWeightInitialization = DNN::EInitialization::kGauss;
353  } else if (fWeightInitializationString == "UNIFORM") {
354  fWeightInitialization = DNN::EInitialization::kUniform;
355  } else if (fWeightInitializationString == "ZERO") {
356  fWeightInitialization = DNN::EInitialization::kZero;
357  } else if (fWeightInitializationString == "IDENTITY") {
358  fWeightInitialization = DNN::EInitialization::kIdentity;
359  } else {
360  fWeightInitialization = DNN::EInitialization::kGlorotUniform;
361  }
362 
363  // Training settings.
364 
365  KeyValueVector_t strategyKeyValues = ParseKeyValueString(fTrainingStrategyString, TString("|"), TString(","));
366  for (auto &block : strategyKeyValues) {
367  TTrainingSettings settings;
368 
369  settings.convergenceSteps = fetchValueTmp(block, "ConvergenceSteps", 100);
370  settings.batchSize = fetchValueTmp(block, "BatchSize", 30);
371  settings.maxEpochs = fetchValueTmp(block, "MaxEpochs", 2000);
372  settings.testInterval = fetchValueTmp(block, "TestRepetitions", 7);
373  settings.weightDecay = fetchValueTmp(block, "WeightDecay", 0.0);
374  settings.learningRate = fetchValueTmp(block, "LearningRate", 1e-5);
375  settings.momentum = fetchValueTmp(block, "Momentum", 0.3);
376  settings.dropoutProbabilities = fetchValueTmp(block, "DropConfig", std::vector<Double_t>());
377 
378  TString regularization = fetchValueTmp(block, "Regularization", TString("NONE"));
379  if (regularization == "L1") {
380  settings.regularization = DNN::ERegularization::kL1;
381  } else if (regularization == "L2") {
382  settings.regularization = DNN::ERegularization::kL2;
383  } else {
384  settings.regularization = DNN::ERegularization::kNone;
385  }
386 
387  TString optimizer = fetchValueTmp(block, "Optimizer", TString("ADAM"));
388  settings.optimizerName = optimizer;
389  if (optimizer == "SGD") {
390  settings.optimizer = DNN::EOptimizer::kSGD;
391  } else if (optimizer == "ADAM") {
392  settings.optimizer = DNN::EOptimizer::kAdam;
393  } else if (optimizer == "ADAGRAD") {
394  settings.optimizer = DNN::EOptimizer::kAdagrad;
395  } else if (optimizer == "RMSPROP") {
396  settings.optimizer = DNN::EOptimizer::kRMSProp;
397  } else if (optimizer == "ADADELTA") {
398  settings.optimizer = DNN::EOptimizer::kAdadelta;
399  } else {
400  // Make Adam as default choice if the input string is
401  // incorrect.
402  settings.optimizer = DNN::EOptimizer::kAdam;
403  settings.optimizerName = "ADAM";
404  }
405 
406 
407  TString strMultithreading = fetchValueTmp(block, "Multithreading", TString("True"));
408 
409  if (strMultithreading.BeginsWith("T")) {
410  settings.multithreading = true;
411  } else {
412  settings.multithreading = false;
413  }
414 
415  fTrainingSettings.push_back(settings);
416  }
417 
418  this->SetBatchSize(fTrainingSettings.front().batchSize);
419 
420  // case inputlayout and batch layout was not given. Use default then
421  // (1, batchsize, nvariables)
422  // fInputShape[0] -> BatchSize
423  // fInputShape[1] -> InputDepth
424  // fInputShape[2] -> InputHeight
425  // fInputShape[3] -> InputWidth
426  if (fInputShape[3] == 0 && fInputShape[2] == 0 && fInputShape[1] == 0) {
427  fInputShape[1] = 1;
428  fInputShape[2] = 1;
429  fInputShape[3] = GetNVariables();
430  }
431  if (fBatchWidth == 0 && fBatchHeight == 0 && fBatchDepth == 0) {
432  if (fInputShape[2] == 1 && fInputShape[1] == 1) {
433  // case of (1, batchsize, input features)
434  fBatchDepth = 1;
435  fBatchHeight = fTrainingSettings.front().batchSize;
436  fBatchWidth = fInputShape[3];
437  }
438  else { // more general cases (e.g. for CNN)
439  fBatchDepth = fTrainingSettings.front().batchSize;
440  fBatchHeight = fInputShape[1];
441  fBatchWidth = fInputShape[3]*fInputShape[2];
442  }
443  }
444 }
445 
446 ////////////////////////////////////////////////////////////////////////////////
447 /// default initializations
448 void MethodDL::Init()
449 {
450  // Nothing to do here
451 }
452 
453 ////////////////////////////////////////////////////////////////////////////////
454 /// Parse the input layout
455 void MethodDL::ParseInputLayout()
456 {
457  // Define the delimiter
458  const TString delim("|");
459 
460  // Get the input layout string
461  TString inputLayoutString = this->GetInputLayoutString();
462 
463  // Split the input layout string
464  TObjArray *inputDimStrings = inputLayoutString.Tokenize(delim);
465  TIter nextInputDim(inputDimStrings);
466  TObjString *inputDimString = (TObjString *)nextInputDim();
467 
468  // Go through every token and save its absolute value in the shape array
469  // The first token is the batch size for easy compatibility with cudnn
470  int subDim = 1;
471  std::vector<size_t> inputShape;
472  inputShape.reserve(inputLayoutString.Length()/2 + 2);
473  inputShape.push_back(30); // Will be set by Trainingsettings, use default now
474  for (; inputDimString != nullptr; inputDimString = (TObjString *)nextInputDim()) {
475  // size_t is unsigned
476  subDim = (size_t) abs(inputDimString->GetString().Atoi());
477  // Size among unused dimensions should be set to 1 for cudnn
478  //if (subDim == 0) subDim = 1;
479  inputShape.push_back(subDim);
480  }
481 
482  this->SetInputShape(inputShape);
483 }
484 
485 ////////////////////////////////////////////////////////////////////////////////
486 /// Parse the input layout
487 void MethodDL::ParseBatchLayout()
488 {
489  // Define the delimiter
490  const TString delim("|");
491 
492  // Get the input layout string
493  TString batchLayoutString = this->GetBatchLayoutString();
494 
495  size_t batchDepth = 0;
496  size_t batchHeight = 0;
497  size_t batchWidth = 0;
498 
499  // Split the input layout string
500  TObjArray *batchDimStrings = batchLayoutString.Tokenize(delim);
501  TIter nextBatchDim(batchDimStrings);
502  TObjString *batchDimString = (TObjString *)nextBatchDim();
503  int idxToken = 0;
504 
505  for (; batchDimString != nullptr; batchDimString = (TObjString *)nextBatchDim()) {
506  switch (idxToken) {
507  case 0: // input depth
508  {
509  TString strDepth(batchDimString->GetString());
510  batchDepth = (size_t)strDepth.Atoi();
511  } break;
512  case 1: // input height
513  {
514  TString strHeight(batchDimString->GetString());
515  batchHeight = (size_t)strHeight.Atoi();
516  } break;
517  case 2: // input width
518  {
519  TString strWidth(batchDimString->GetString());
520  batchWidth = (size_t)strWidth.Atoi();
521  } break;
522  }
523  ++idxToken;
524  }
525 
526  this->SetBatchDepth(batchDepth);
527  this->SetBatchHeight(batchHeight);
528  this->SetBatchWidth(batchWidth);
529 }
530 
531 ////////////////////////////////////////////////////////////////////////////////
532 /// Create a deep net based on the layout string
533 template <typename Architecture_t, typename Layer_t>
534 void MethodDL::CreateDeepNet(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
535  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> &nets)
536 {
537  // Layer specification, layer details
538  const TString layerDelimiter(",");
539  const TString subDelimiter("|");
540 
541  TString layoutString = this->GetLayoutString();
542 
543  //std::cout << "Create Deepnet - layout string " << layoutString << "\t layers : " << deepNet.GetLayers().size() << std::endl;
544 
545  // Split layers
546  TObjArray *layerStrings = layoutString.Tokenize(layerDelimiter);
547  TIter nextLayer(layerStrings);
548  TObjString *layerString = (TObjString *)nextLayer();
549 
550 
551  for (; layerString != nullptr; layerString = (TObjString *)nextLayer()) {
552 
553  // Split layer details
554  TObjArray *subStrings = layerString->GetString().Tokenize(subDelimiter);
555  TIter nextToken(subStrings);
556  TObjString *token = (TObjString *)nextToken();
557 
558  // Determine the type of the layer
559  TString strLayerType = token->GetString();
560 
561 
562  if (strLayerType == "DENSE") {
563  ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
564  } else if (strLayerType == "CONV") {
565  ParseConvLayer(deepNet, nets, layerString->GetString(), subDelimiter);
566  } else if (strLayerType == "MAXPOOL") {
567  ParseMaxPoolLayer(deepNet, nets, layerString->GetString(), subDelimiter);
568  } else if (strLayerType == "RESHAPE") {
569  ParseReshapeLayer(deepNet, nets, layerString->GetString(), subDelimiter);
570  } else if (strLayerType == "BNORM") {
571  ParseBatchNormLayer(deepNet, nets, layerString->GetString(), subDelimiter);
572  } else if (strLayerType == "RNN") {
573  ParseRnnLayer(deepNet, nets, layerString->GetString(), subDelimiter);
574  // } else if (strLayerType == "LSTM") {
575  // Log() << kError << "LSTM Layer is not yet fully implemented" << Endl;
576  // //ParseLstmLayer(deepNet, nets, layerString->GetString(), subDelimiter);
577  // break;
578  } else {
579  // no type of layer specified - assume is dense layer as in old DNN interface
580  ParseDenseLayer(deepNet, nets, layerString->GetString(), subDelimiter);
581  }
582  }
583 }
584 
585 ////////////////////////////////////////////////////////////////////////////////
586 /// Pases the layer string and creates the appropriate dense layer
587 template <typename Architecture_t, typename Layer_t>
588 void MethodDL::ParseDenseLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
589  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
590  TString delim)
591 {
592  int width = 0;
593  EActivationFunction activationFunction = EActivationFunction::kTanh;
594 
595  // this return number of input variables for the method
596  // it can be used to deduce width of dense layer if specified as N+10
597  // where N is the number of input variables
598  const size_t inputSize = GetNvar();
599 
600  // Split layer details
601  TObjArray *subStrings = layerString.Tokenize(delim);
602  TIter nextToken(subStrings);
603  TObjString *token = (TObjString *)nextToken();
604  int idxToken = 0;
605 
606  // loop on the tokens
607  // order of sepcifying width and activation function is not relevant
608  // both 100|TANH and TANH|100 are valid cases
609  for (; token != nullptr; token = (TObjString *)nextToken()) {
610  idxToken++;
611  // try a match with the activation function
612  TString strActFnc(token->GetString());
613  // if first token defines the layer type- skip it
614  if (strActFnc =="DENSE") continue;
615 
616  if (strActFnc == "RELU") {
617  activationFunction = DNN::EActivationFunction::kRelu;
618  } else if (strActFnc == "TANH") {
619  activationFunction = DNN::EActivationFunction::kTanh;
620  } else if (strActFnc == "SYMMRELU") {
621  activationFunction = DNN::EActivationFunction::kSymmRelu;
622  } else if (strActFnc == "SOFTSIGN") {
623  activationFunction = DNN::EActivationFunction::kSoftSign;
624  } else if (strActFnc == "SIGMOID") {
625  activationFunction = DNN::EActivationFunction::kSigmoid;
626  } else if (strActFnc == "LINEAR") {
627  activationFunction = DNN::EActivationFunction::kIdentity;
628  } else if (strActFnc == "GAUSS") {
629  activationFunction = DNN::EActivationFunction::kGauss;
630  } else if (width == 0) {
631  // no match found try to parse as text showing the width
632  // support for input a formula where the variable 'x' is 'N' in the string
633  // use TFormula for the evaluation
634  TString strNumNodes = strActFnc;
635  // number of nodes
636  TString strN("x");
637  strNumNodes.ReplaceAll("N", strN);
638  strNumNodes.ReplaceAll("n", strN);
639  TFormula fml("tmp", strNumNodes);
640  width = fml.Eval(inputSize);
641  }
642 
643  }
644  // avoid zero width. assume is 1
645  if (width == 0) width = 1;
646 
647  // Add the dense layer, initialize the weights and biases and copy
648  TDenseLayer<Architecture_t> *denseLayer = deepNet.AddDenseLayer(width, activationFunction);
649  denseLayer->Initialize();
650 
651  // add same layer to fNet
652  if (fBuildNet) fNet->AddDenseLayer(width, activationFunction);
653 
654  //TDenseLayer<Architecture_t> *copyDenseLayer = new TDenseLayer<Architecture_t>(*denseLayer);
655 
656  // add the copy to all slave nets
657  //for (size_t i = 0; i < nets.size(); i++) {
658  // nets[i].AddDenseLayer(copyDenseLayer);
659  //}
660 
661  // check compatibility of added layer
662  // for a dense layer input should be 1 x 1 x DxHxW
663 }
664 
665 ////////////////////////////////////////////////////////////////////////////////
666 /// Pases the layer string and creates the appropriate convolutional layer
667 template <typename Architecture_t, typename Layer_t>
668 void MethodDL::ParseConvLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
669  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
670  TString delim)
671 {
672  int depth = 0;
673  int fltHeight = 0;
674  int fltWidth = 0;
675  int strideRows = 0;
676  int strideCols = 0;
677  int zeroPadHeight = 0;
678  int zeroPadWidth = 0;
679  EActivationFunction activationFunction = EActivationFunction::kTanh;
680 
681  // Split layer details
682  TObjArray *subStrings = layerString.Tokenize(delim);
683  TIter nextToken(subStrings);
684  TObjString *token = (TObjString *)nextToken();
685  int idxToken = 0;
686 
687  for (; token != nullptr; token = (TObjString *)nextToken()) {
688  switch (idxToken) {
689  case 1: // depth
690  {
691  TString strDepth(token->GetString());
692  depth = strDepth.Atoi();
693  } break;
694  case 2: // filter height
695  {
696  TString strFltHeight(token->GetString());
697  fltHeight = strFltHeight.Atoi();
698  } break;
699  case 3: // filter width
700  {
701  TString strFltWidth(token->GetString());
702  fltWidth = strFltWidth.Atoi();
703  } break;
704  case 4: // stride in rows
705  {
706  TString strStrideRows(token->GetString());
707  strideRows = strStrideRows.Atoi();
708  } break;
709  case 5: // stride in cols
710  {
711  TString strStrideCols(token->GetString());
712  strideCols = strStrideCols.Atoi();
713  } break;
714  case 6: // zero padding height
715  {
716  TString strZeroPadHeight(token->GetString());
717  zeroPadHeight = strZeroPadHeight.Atoi();
718  } break;
719  case 7: // zero padding width
720  {
721  TString strZeroPadWidth(token->GetString());
722  zeroPadWidth = strZeroPadWidth.Atoi();
723  } break;
724  case 8: // activation function
725  {
726  TString strActFnc(token->GetString());
727  if (strActFnc == "RELU") {
728  activationFunction = DNN::EActivationFunction::kRelu;
729  } else if (strActFnc == "TANH") {
730  activationFunction = DNN::EActivationFunction::kTanh;
731  } else if (strActFnc == "SYMMRELU") {
732  activationFunction = DNN::EActivationFunction::kSymmRelu;
733  } else if (strActFnc == "SOFTSIGN") {
734  activationFunction = DNN::EActivationFunction::kSoftSign;
735  } else if (strActFnc == "SIGMOID") {
736  activationFunction = DNN::EActivationFunction::kSigmoid;
737  } else if (strActFnc == "LINEAR") {
738  activationFunction = DNN::EActivationFunction::kIdentity;
739  } else if (strActFnc == "GAUSS") {
740  activationFunction = DNN::EActivationFunction::kGauss;
741  }
742  } break;
743  }
744  ++idxToken;
745  }
746 
747  // Add the convolutional layer, initialize the weights and biases and copy
748  TConvLayer<Architecture_t> *convLayer = deepNet.AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
749  zeroPadHeight, zeroPadWidth, activationFunction);
750  convLayer->Initialize();
751 
752  // Add same layer to fNet
753  if (fBuildNet) fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
754  zeroPadHeight, zeroPadWidth, activationFunction);
755 
756  //TConvLayer<Architecture_t> *copyConvLayer = new TConvLayer<Architecture_t>(*convLayer);
757 
758  //// add the copy to all slave nets
759  //for (size_t i = 0; i < nets.size(); i++) {
760  // nets[i].AddConvLayer(copyConvLayer);
761  //}
762 }
763 
764 ////////////////////////////////////////////////////////////////////////////////
765 /// Pases the layer string and creates the appropriate max pool layer
766 template <typename Architecture_t, typename Layer_t>
767 void MethodDL::ParseMaxPoolLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
768  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
769  TString delim)
770 {
771 
772  int filterHeight = 0;
773  int filterWidth = 0;
774  int strideRows = 0;
775  int strideCols = 0;
776 
777  // Split layer details
778  TObjArray *subStrings = layerString.Tokenize(delim);
779  TIter nextToken(subStrings);
780  TObjString *token = (TObjString *)nextToken();
781  int idxToken = 0;
782 
783  for (; token != nullptr; token = (TObjString *)nextToken()) {
784  switch (idxToken) {
785  case 1: // filter height
786  {
787  TString strFrmHeight(token->GetString());
788  filterHeight = strFrmHeight.Atoi();
789  } break;
790  case 2: // filter width
791  {
792  TString strFrmWidth(token->GetString());
793  filterWidth = strFrmWidth.Atoi();
794  } break;
795  case 3: // stride in rows
796  {
797  TString strStrideRows(token->GetString());
798  strideRows = strStrideRows.Atoi();
799  } break;
800  case 4: // stride in cols
801  {
802  TString strStrideCols(token->GetString());
803  strideCols = strStrideCols.Atoi();
804  } break;
805  }
806  ++idxToken;
807  }
808 
809  // Add the Max pooling layer
810  // TMaxPoolLayer<Architecture_t> *maxPoolLayer =
811  deepNet.AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
812 
813  // Add the same layer to fNet
814  if (fBuildNet) fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
815 
816 
817  //TMaxPoolLayer<Architecture_t> *copyMaxPoolLayer = new TMaxPoolLayer<Architecture_t>(*maxPoolLayer);
818 
819  //// add the copy to all slave nets
820  //for (size_t i = 0; i < nets.size(); i++) {
821  // nets[i].AddMaxPoolLayer(copyMaxPoolLayer);
822  //}
823 }
824 
825 ////////////////////////////////////////////////////////////////////////////////
826 /// Pases the layer string and creates the appropriate reshape layer
827 template <typename Architecture_t, typename Layer_t>
828 void MethodDL::ParseReshapeLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
829  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
830  TString delim)
831 {
832  int depth = 0;
833  int height = 0;
834  int width = 0;
835  bool flattening = false;
836 
837  // Split layer details
838  TObjArray *subStrings = layerString.Tokenize(delim);
839  TIter nextToken(subStrings);
840  TObjString *token = (TObjString *)nextToken();
841  int idxToken = 0;
842 
843  for (; token != nullptr; token = (TObjString *)nextToken()) {
844  if (token->GetString() == "FLAT") idxToken=4;
845  switch (idxToken) {
846  case 1: {
847  TString strDepth(token->GetString());
848  depth = strDepth.Atoi();
849  } break;
850  case 2: // height
851  {
852  TString strHeight(token->GetString());
853  height = strHeight.Atoi();
854  } break;
855  case 3: // width
856  {
857  TString strWidth(token->GetString());
858  width = strWidth.Atoi();
859  } break;
860  case 4: // flattening
861  {
862  TString flat(token->GetString());
863  if (flat == "FLAT") {
864  flattening = true;
865  }
866  } break;
867  }
868  ++idxToken;
869  }
870 
871  // Add the reshape layer
872  // TReshapeLayer<Architecture_t> *reshapeLayer =
873  deepNet.AddReshapeLayer(depth, height, width, flattening);
874 
875  // Add the same layer to fNet
876  if (fBuildNet) fNet->AddReshapeLayer(depth, height, width, flattening);
877 
878  //TReshapeLayer<Architecture_t> *copyReshapeLayer = new TReshapeLayer<Architecture_t>(*reshapeLayer);
879 
880  //// add the copy to all slave nets
881  //for (size_t i = 0; i < nets.size(); i++) {
882  // nets[i].AddReshapeLayer(copyReshapeLayer);
883  //}
884 }
885 
886 ////////////////////////////////////////////////////////////////////////////////
887 /// Pases the layer string and creates the appropriate reshape layer
888 template <typename Architecture_t, typename Layer_t>
889 void MethodDL::ParseBatchNormLayer(DNN::TDeepNet<Architecture_t, Layer_t> &deepNet,
890  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
891  TString delim)
892 {
893 
894  // default values
895  double momentum = -1; //0.99;
896  double epsilon = 0.0001;
897 
898  // Split layer details
899  TObjArray *subStrings = layerString.Tokenize(delim);
900  TIter nextToken(subStrings);
901  TObjString *token = (TObjString *)nextToken();
902  int idxToken = 0;
903 
904  for (; token != nullptr; token = (TObjString *)nextToken()) {
905  switch (idxToken) {
906  case 1: {
907  momentum = std::atof(token->GetString().Data());
908  } break;
909  case 2: // height
910  {
911  epsilon = std::atof(token->GetString().Data());
912  } break;
913  }
914  ++idxToken;
915  }
916 
917  // Add the batch norm layer
918  //
919  auto layer = deepNet.AddBatchNormLayer(momentum, epsilon);
920  layer->Initialize();
921 
922  // Add the same layer to fNet
923  if (fBuildNet) fNet->AddBatchNormLayer(momentum, epsilon);
924 
925 }
926 
927 ////////////////////////////////////////////////////////////////////////////////
928 /// Pases the layer string and creates the appropriate rnn layer
929 template <typename Architecture_t, typename Layer_t>
930 void MethodDL::ParseRnnLayer(DNN::TDeepNet<Architecture_t, Layer_t> & deepNet,
931  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets */, TString layerString,
932  TString delim)
933 {
934  // int depth = 0;
935  int stateSize = 0;
936  int inputSize = 0;
937  int timeSteps = 0;
938  bool rememberState = false;
939 
940  // Split layer details
941  TObjArray *subStrings = layerString.Tokenize(delim);
942  TIter nextToken(subStrings);
943  TObjString *token = (TObjString *)nextToken();
944  int idxToken = 0;
945 
946  for (; token != nullptr; token = (TObjString *)nextToken()) {
947  switch (idxToken) {
948  case 1: // state size
949  {
950  TString strstateSize(token->GetString());
951  stateSize = strstateSize.Atoi();
952  } break;
953  case 2: // input size
954  {
955  TString strinputSize(token->GetString());
956  inputSize = strinputSize.Atoi();
957  } break;
958  case 3: // time steps
959  {
960  TString strtimeSteps(token->GetString());
961  timeSteps = strtimeSteps.Atoi();
962  }
963  case 4: // remember state (1 or 0)
964  {
965  TString strrememberState(token->GetString());
966  rememberState = (bool) strrememberState.Atoi();
967  } break;
968  }
969  ++idxToken;
970  }
971 
972  // Add the recurrent layer, initialize the weights and biases and copy
973  TBasicRNNLayer<Architecture_t> *basicRNNLayer = deepNet.AddBasicRNNLayer(stateSize, inputSize,
974  timeSteps, rememberState);
975  basicRNNLayer->Initialize();
976 
977  // Add same layer to fNet
978  if (fBuildNet) fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
979 
980  //TBasicRNNLayer<Architecture_t> *copyRNNLayer = new TBasicRNNLayer<Architecture_t>(*basicRNNLayer);
981 
982  //// add the copy to all slave nets
983  //for (size_t i = 0; i < nets.size(); i++) {
984  // nets[i].AddBasicRNNLayer(copyRNNLayer);
985  //}
986 }
987 
988 ////////////////////////////////////////////////////////////////////////////////
989 /// Pases the layer string and creates the appropriate lstm layer
990 template <typename Architecture_t, typename Layer_t>
991 void MethodDL::ParseLstmLayer(DNN::TDeepNet<Architecture_t, Layer_t> & /*deepNet*/,
992  std::vector<DNN::TDeepNet<Architecture_t, Layer_t>> & /*nets*/, TString layerString,
993  TString delim)
994 {
995  // Split layer details
996  TObjArray *subStrings = layerString.Tokenize(delim);
997  TIter nextToken(subStrings);
998  TObjString *token = (TObjString *)nextToken();
999  int idxToken = 0;
1000 
1001  for (; token != nullptr; token = (TObjString *)nextToken()) {
1002  switch (idxToken) {
1003  }
1004  ++idxToken;
1005  }
1006 }
1007 
1008 ////////////////////////////////////////////////////////////////////////////////
1009 /// Standard constructor.
1010 MethodDL::MethodDL(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
1011  : MethodBase(jobName, Types::kDL, methodTitle, theData, theOption), fInputShape(4,0),
1012  fBatchHeight(), fBatchWidth(), fRandomSeed(0), fWeightInitialization(),
1013  fOutputFunction(), fLossFunction(), fInputLayoutString(), fBatchLayoutString(),
1014  fLayoutString(), fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1015  fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1016  fXInput()
1017 {
1018  // Nothing to do here
1019 }
1020 
1021 ////////////////////////////////////////////////////////////////////////////////
1022 /// Constructor from a weight file.
1023 MethodDL::MethodDL(DataSetInfo &theData, const TString &theWeightFile)
1024  : MethodBase(Types::kDL, theData, theWeightFile), fInputShape(4,0), fBatchHeight(),
1025  fBatchWidth(), fRandomSeed(0), fWeightInitialization(), fOutputFunction(),
1026  fLossFunction(), fInputLayoutString(), fBatchLayoutString(), fLayoutString(),
1027  fErrorStrategy(), fTrainingStrategyString(), fWeightInitializationString(),
1028  fArchitectureString(), fResume(false), fBuildNet(true), fTrainingSettings(),
1029  fXInput()
1030 {
1031  // Nothing to do here
1032 }
1033 
1034 ////////////////////////////////////////////////////////////////////////////////
1035 /// Destructor.
1036 MethodDL::~MethodDL()
1037 {
1038  // Nothing to do here
1039 }
1040 
1041 ////////////////////////////////////////////////////////////////////////////////
1042 /// Parse key value pairs in blocks -> return vector of blocks with map of key value pairs.
1043 auto MethodDL::ParseKeyValueString(TString parseString, TString blockDelim, TString tokenDelim) -> KeyValueVector_t
1044 {
1045  // remove empty spaces
1046  parseString.ReplaceAll(" ","");
1047  KeyValueVector_t blockKeyValues;
1048  const TString keyValueDelim("=");
1049 
1050  TObjArray *blockStrings = parseString.Tokenize(blockDelim);
1051  TIter nextBlock(blockStrings);
1052  TObjString *blockString = (TObjString *)nextBlock();
1053 
1054  for (; blockString != nullptr; blockString = (TObjString *)nextBlock()) {
1055  blockKeyValues.push_back(std::map<TString, TString>());
1056  std::map<TString, TString> &currentBlock = blockKeyValues.back();
1057 
1058  TObjArray *subStrings = blockString->GetString().Tokenize(tokenDelim);
1059  TIter nextToken(subStrings);
1060  TObjString *token = (TObjString *)nextToken();
1061 
1062  for (; token != nullptr; token = (TObjString *)nextToken()) {
1063  TString strKeyValue(token->GetString());
1064  int delimPos = strKeyValue.First(keyValueDelim.Data());
1065  if (delimPos <= 0) continue;
1066 
1067  TString strKey = TString(strKeyValue(0, delimPos));
1068  strKey.ToUpper();
1069  TString strValue = TString(strKeyValue(delimPos + 1, strKeyValue.Length()));
1070 
1071  strKey.Strip(TString::kBoth, ' ');
1072  strValue.Strip(TString::kBoth, ' ');
1073 
1074  currentBlock.insert(std::make_pair(strKey, strValue));
1075  }
1076  }
1077  return blockKeyValues;
1078 }
1079 
1080 ////////////////////////////////////////////////////////////////////////////////
1081 /// What kind of analysis type can handle the CNN
1082 Bool_t MethodDL::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/)
1083 {
1084  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
1085  if (type == Types::kMulticlass) return kTRUE;
1086  if (type == Types::kRegression) return kTRUE;
1087 
1088  return kFALSE;
1089 }
1090 
1091 ////////////////////////////////////////////////////////////////////////////////
1092 /// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
1093 /// 100 etc.
1094 /// - 20% and 0.2 selects 20% of the training set as validation data.
1095 /// - 100 selects 100 events as the validation data.
1096 ///
1097 /// @return number of samples in validation set
1098 ///
1099 UInt_t TMVA::MethodDL::GetNumValidationSamples()
1100 {
1101  Int_t nValidationSamples = 0;
1102  UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
1103 
1104  // Parsing + Validation
1105  // --------------------
1106  if (fNumValidationString.EndsWith("%")) {
1107  // Relative spec. format 20%
1108  TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
1109 
1110  if (intValStr.IsFloat()) {
1111  Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
1112  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1113  } else {
1114  Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
1115  << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
1116  }
1117  } else if (fNumValidationString.IsFloat()) {
1118  Double_t valSizeAsDouble = fNumValidationString.Atof();
1119 
1120  if (valSizeAsDouble < 1.0) {
1121  // Relative spec. format 0.2
1122  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
1123  } else {
1124  // Absolute spec format 100 or 100.0
1125  nValidationSamples = valSizeAsDouble;
1126  }
1127  } else {
1128  Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
1129  << Endl;
1130  }
1131 
1132  // Value validation
1133  // ----------------
1134  if (nValidationSamples < 0) {
1135  Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
1136  }
1137 
1138  if (nValidationSamples == 0) {
1139  Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
1140  }
1141 
1142  if (nValidationSamples >= (Int_t)trainingSetSize) {
1143  Log() << kFATAL << "Validation size \"" << fNumValidationString
1144  << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
1145  }
1146 
1147  return nValidationSamples;
1148 }
1149 
1150 
1151 ////////////////////////////////////////////////////////////////////////////////
1152 /// Implementation of architecture specific train method
1153 ///
1154 template <typename Architecture_t>
1155 void MethodDL::TrainDeepNet()
1156 {
1157 
1158  using Scalar_t = typename Architecture_t::Scalar_t;
1159  using Layer_t = TMVA::DNN::VGeneralLayer<Architecture_t>;
1160  using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t, Layer_t>;
1161  using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1162 
1163  bool debug = Log().GetMinType() == kDEBUG;
1164 
1165 
1166  // Determine the number of outputs
1167  // // size_t outputSize = 1;
1168  // // if (fAnalysisType == Types::kRegression && GetNTargets() != 0) {
1169  // // outputSize = GetNTargets();
1170  // // } else if (fAnalysisType == Types::kMulticlass && DataInfo().GetNClasses() >= 2) {
1171  // // outputSize = DataInfo().GetNClasses();
1172  // // }
1173 
1174  // set the random seed for weight initialization
1175  Architecture_t::SetRandomSeed(fRandomSeed);
1176 
1177  ///split training data in training and validation data
1178  // and determine the number of training and testing examples
1179 
1180  size_t nValidationSamples = GetNumValidationSamples();
1181  size_t nTrainingSamples = GetEventCollection(Types::kTraining).size() - nValidationSamples;
1182 
1183  const std::vector<TMVA::Event *> &allData = GetEventCollection(Types::kTraining);
1184  const std::vector<TMVA::Event *> eventCollectionTraining{allData.begin(), allData.begin() + nTrainingSamples};
1185  const std::vector<TMVA::Event *> eventCollectionValidation{allData.begin() + nTrainingSamples, allData.end()};
1186 
1187  size_t trainingPhase = 1;
1188 
1189  for (TTrainingSettings &settings : this->GetTrainingSettings()) {
1190 
1191  size_t nThreads = 1; // FIXME threads are hard coded to 1, no use of slave threads or multi-threading
1192 
1193 
1194  // After the processing of the options, initialize the master deep net
1195  size_t batchSize = settings.batchSize;
1196  this->SetBatchSize(batchSize);
1197  // Should be replaced by actual implementation. No support for this now.
1198  size_t inputDepth = this->GetInputDepth();
1199  size_t inputHeight = this->GetInputHeight();
1200  size_t inputWidth = this->GetInputWidth();
1201  size_t batchDepth = this->GetBatchDepth();
1202  size_t batchHeight = this->GetBatchHeight();
1203  size_t batchWidth = this->GetBatchWidth();
1204  ELossFunction J = this->GetLossFunction();
1205  EInitialization I = this->GetWeightInitialization();
1206  ERegularization R = settings.regularization;
1207  EOptimizer O = settings.optimizer;
1208  Scalar_t weightDecay = settings.weightDecay;
1209 
1210  //Batch size should be included in batch layout as well. There are two possibilities:
1211  // 1. Batch depth = batch size one will input tensorsa as (batch_size x d1 x d2)
1212  // This is case for example if first layer is a conv layer and d1 = image depth, d2 = image width x image height
1213  // 2. Batch depth = 1, batch height = batch size batxch width = dim of input features
1214  // This should be case if first layer is a Dense 1 and input tensor must be ( 1 x batch_size x input_features )
1215 
1216  if (batchDepth != batchSize && batchDepth > 1) {
1217  Error("Train","Given batch depth of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchDepth,batchSize);
1218  return;
1219  }
1220  if (batchDepth == 1 && batchSize > 1 && batchSize != batchHeight ) {
1221  Error("Train","Given batch height of %zu (specified in BatchLayout) should be equal to given batch size %zu",batchHeight,batchSize);
1222  return;
1223  }
1224 
1225 
1226  //check also that input layout compatible with batch layout
1227  bool badLayout = false;
1228  // case batch depth == batch size
1229  if (batchDepth == batchSize)
1230  badLayout = ( inputDepth * inputHeight * inputWidth != batchHeight * batchWidth ) ;
1231  // case batch Height is batch size
1232  if (batchHeight == batchSize && batchDepth == 1)
1233  badLayout |= ( inputDepth * inputHeight * inputWidth != batchWidth);
1234  if (badLayout) {
1235  Error("Train","Given input layout %zu x %zu x %zu is not compatible with batch layout %zu x %zu x %zu ",
1236  inputDepth,inputHeight,inputWidth,batchDepth,batchHeight,batchWidth);
1237  return;
1238  }
1239 
1240  // check batch size is compatible with number of events
1241  if (nTrainingSamples < settings.batchSize || nValidationSamples < settings.batchSize) {
1242  Log() << kFATAL << "Number of samples in the datasets are train: ("
1243  << nTrainingSamples << ") test: (" << nValidationSamples
1244  << "). One of these is smaller than the batch size of "
1245  << settings.batchSize << ". Please increase the batch"
1246  << " size to be at least the same size as the smallest"
1247  << " of them." << Endl;
1248  }
1249 
1250  DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1251 
1252  // create a copy of DeepNet for evaluating but with batch size = 1
1253  // fNet is the saved network and will be with CPU or Referrence architecture
1254  if (trainingPhase == 1) {
1255  fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(1, inputDepth, inputHeight, inputWidth, batchDepth,
1256  batchHeight, batchWidth, J, I, R, weightDecay));
1257  fBuildNet = true;
1258  }
1259  else
1260  fBuildNet = false;
1261 
1262  // Initialize the vector of slave nets
1263  std::vector<DeepNet_t> nets{};
1264  nets.reserve(nThreads);
1265  for (size_t i = 0; i < nThreads; i++) {
1266  // create a copies of the master deep net
1267  nets.push_back(deepNet);
1268  }
1269 
1270 
1271  // Add all appropriate layers to deepNet and (if fBuildNet is true) also to fNet
1272  CreateDeepNet(deepNet, nets);
1273 
1274 
1275  // set droput probabilities
1276  // use convention to store in the layer 1.- dropout probabilities
1277  std::vector<Double_t> dropoutVector(settings.dropoutProbabilities);
1278  for (auto & p : dropoutVector) {
1279  p = 1.0 - p;
1280  }
1281  deepNet.SetDropoutProbabilities(dropoutVector);
1282 
1283  if (trainingPhase > 1) {
1284  // copy initial weights from fNet to deepnet
1285  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1286  deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1287  }
1288  }
1289 
1290  // when fNet is built create also input matrix that will be used to evaluate it
1291  if (fBuildNet) {
1292  //int n1 = batchHeight;
1293  //int n2 = batchWidth;
1294  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1295  //if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) n1 = fNet->GetBatchSize();
1296  //fXInput = TensorImpl_t(1,n1,n2);
1297  fXInput = ArchitectureImpl_t::CreateTensor(fNet->GetBatchSize(), GetInputDepth(), GetInputHeight(), GetInputWidth() );
1298  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
1299  fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth() );
1300  fXInputBuffer = HostBufferImpl_t( fXInput.GetSize() );
1301 
1302 
1303  // create pointer to output matrix used for the predictions
1304  fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
1305 
1306  // print the created network
1307  Log() << "***** Deep Learning Network *****" << Endl;
1308  if (Log().GetMinType() <= kINFO)
1309  deepNet.Print();
1310  }
1311  Log() << "Using " << nTrainingSamples << " events for training and " << nValidationSamples << " for testing" << Endl;
1312 
1313  // Loading the training and validation datasets
1314  TMVAInput_t trainingTuple = std::tie(eventCollectionTraining, DataInfo());
1315  TensorDataLoader_t trainingData(trainingTuple, nTrainingSamples, batchSize,
1316  {inputDepth, inputHeight, inputWidth},
1317  {deepNet.GetBatchDepth(), deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1318  deepNet.GetOutputWidth(), nThreads);
1319 
1320  TMVAInput_t validationTuple = std::tie(eventCollectionValidation, DataInfo());
1321  TensorDataLoader_t validationData(validationTuple, nValidationSamples, batchSize,
1322  {inputDepth, inputHeight, inputWidth},
1323  { deepNet.GetBatchDepth(),deepNet.GetBatchHeight(), deepNet.GetBatchWidth()} ,
1324  deepNet.GetOutputWidth(), nThreads);
1325 
1326 
1327 
1328  // do an evaluation of the network to compute initial minimum test error
1329 
1330  Bool_t includeRegularization = (R != DNN::ERegularization::kNone);
1331 
1332  Double_t minValError = 0.0;
1333  Log() << "Compute initial loss on the validation data " << Endl;
1334  for (auto batch : validationData) {
1335  auto inputTensor = batch.GetInput();
1336  auto outputMatrix = batch.GetOutput();
1337  auto weights = batch.GetWeights();
1338 
1339  //std::cout << " input use count " << inputTensor.GetBufferUseCount() << std::endl;
1340  // should we apply droput to the loss ??
1341  minValError += deepNet.Loss(inputTensor, outputMatrix, weights, false, includeRegularization);
1342  }
1343  // add Regularization term
1344  Double_t regzTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1345  minValError /= (Double_t)(nValidationSamples / settings.batchSize);
1346  minValError += regzTerm;
1347 
1348 
1349  // create a pointer to base class VOptimizer
1350  std::unique_ptr<DNN::VOptimizer<Architecture_t, Layer_t, DeepNet_t>> optimizer;
1351 
1352  // initialize the base class pointer with the corresponding derived class object.
1353  switch (O) {
1354 
1355  case EOptimizer::kSGD:
1356  optimizer = std::unique_ptr<DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>>(
1357  new DNN::TSGD<Architecture_t, Layer_t, DeepNet_t>(settings.learningRate, deepNet, settings.momentum));
1358  break;
1359 
1360  case EOptimizer::kAdam:
1361  optimizer = std::unique_ptr<DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>>(
1362  new DNN::TAdam<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1363  break;
1364 
1365  case EOptimizer::kAdagrad:
1366  optimizer = std::unique_ptr<DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>>(
1367  new DNN::TAdagrad<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1368  break;
1369 
1370  case EOptimizer::kRMSProp:
1371  optimizer = std::unique_ptr<DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>>(
1372  new DNN::TRMSProp<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate, settings.momentum));
1373  break;
1374 
1375  case EOptimizer::kAdadelta:
1376  optimizer = std::unique_ptr<DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>>(
1377  new DNN::TAdadelta<Architecture_t, Layer_t, DeepNet_t>(deepNet, settings.learningRate));
1378  break;
1379  }
1380 
1381 
1382  // Initialize the vector of batches, one batch for one slave network
1383  std::vector<TTensorBatch<Architecture_t>> batches{};
1384 
1385  bool converged = false;
1386  size_t convergenceCount = 0;
1387  size_t batchesInEpoch = nTrainingSamples / deepNet.GetBatchSize();
1388 
1389  // start measuring
1390  std::chrono::time_point<std::chrono::system_clock> tstart, tend;
1391  tstart = std::chrono::system_clock::now();
1392 
1393  Log() << "Training phase " << trainingPhase << " of " << this->GetTrainingSettings().size() << ": "
1394  << " Optimizer " << settings.optimizerName
1395  << " Learning rate = " << settings.learningRate
1396  << " regularization " << (char) settings.regularization
1397  << " minimum error = " << minValError
1398  << Endl;
1399  if (!fInteractive) {
1400  std::string separator(62, '-');
1401  Log() << separator << Endl;
1402  Log() << std::setw(10) << "Epoch"
1403  << " | " << std::setw(12) << "Train Err." << std::setw(12) << "Val. Err."
1404  << std::setw(12) << "t(s)/epoch" << std::setw(12) << "t(s)/Loss"
1405  << std::setw(12) << "nEvents/s"
1406  << std::setw(12) << "Conv. Steps" << Endl;
1407  Log() << separator << Endl;
1408  }
1409 
1410  // set up generator for shuffling the batches
1411  // if seed is zero we have always a different order in the batches
1412  size_t shuffleSeed = 0;
1413  if (fRandomSeed != 0) shuffleSeed = fRandomSeed + trainingPhase;
1414  RandomGenerator<TRandom3> rng(shuffleSeed);
1415 
1416  // print weights before
1417  if (fBuildNet && debug) {
1418  Log() << "Initial Deep Net Weights " << Endl;
1419  auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1420  for (size_t l = 0; l < weights_tensor.size(); ++l)
1421  weights_tensor[l].Print();
1422  auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1423  bias_tensor[0].Print();
1424  }
1425 
1426  Log() << " Start epoch iteration ..." << Endl;
1427  bool debugFirstEpoch = false;
1428  bool computeLossInTraining = true; // compute loss in training or at test time
1429  while (!converged) {
1430  optimizer->IncrementGlobalStep();
1431  trainingData.Shuffle(rng);
1432 
1433  // execute all epochs
1434  //for (size_t i = 0; i < batchesInEpoch; i += nThreads) {
1435 
1436  Double_t trainingError = 0;
1437  for (size_t i = 0; i < batchesInEpoch; ++i ) {
1438  // Clean and load new batches, one batch for one slave net
1439  //batches.clear();
1440  //batches.reserve(nThreads);
1441  //for (size_t j = 0; j < nThreads; j++) {
1442  // batches.push_back(trainingData.GetTensorBatch());
1443  //}
1444  if (debugFirstEpoch) std::cout << "\n\n----- batch # " << i << "\n\n";
1445 
1446  auto my_batch = trainingData.GetTensorBatch();
1447 
1448  if (debugFirstEpoch)
1449  std::cout << "got batch data - doing forward \n";
1450 
1451 #ifdef DEBUG
1452 
1453  Architecture_t::PrintTensor(my_batch.GetInput(),"input tensor",true);
1454  typename Architecture_t::Tensor_t tOut(my_batch.GetOutput());
1455  typename Architecture_t::Tensor_t tW(my_batch.GetWeights());
1456  Architecture_t::PrintTensor(tOut,"label tensor",true) ;
1457  Architecture_t::PrintTensor(tW,"weight tensor",true) ;
1458 #endif
1459 
1460  deepNet.Forward(my_batch.GetInput(), true);
1461  // compute also loss
1462  if (computeLossInTraining) {
1463  auto outputMatrix = my_batch.GetOutput();
1464  auto weights = my_batch.GetWeights();
1465  trainingError += deepNet.Loss(outputMatrix, weights, false);
1466  }
1467 
1468  if (debugFirstEpoch)
1469  std::cout << "- doing backward \n";
1470 
1471 #ifdef DEBUG
1472  size_t nlayers = deepNet.GetLayers().size();
1473  for (size_t l = 0; l < nlayers; ++l) {
1474  if (deepNet.GetLayerAt(l)->GetWeights().size() > 0)
1475  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),
1476  TString::Format("initial weights layer %d", l).Data());
1477 
1478  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetOutput(),
1479  TString::Format("output tensor layer %d", l).Data());
1480  }
1481 #endif
1482 
1483  //Architecture_t::PrintTensor(deepNet.GetLayerAt(nlayers-1)->GetOutput(),"output tensor last layer" );
1484 
1485  deepNet.Backward(my_batch.GetInput(), my_batch.GetOutput(), my_batch.GetWeights());
1486 
1487  if (debugFirstEpoch)
1488  std::cout << "- doing optimizer update \n";
1489 
1490  optimizer->Step();
1491 
1492 #ifdef DEBUG
1493  std::cout << "minmimizer step - momentum " << settings.momentum << " learning rate " << optimizer->GetLearningRate() << std::endl;
1494  for (size_t l = 0; l < nlayers; ++l) {
1495  if (deepNet.GetLayerAt(l)->GetWeights().size() > 0) {
1496  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightsAt(0),TString::Format("weights after step layer %d",l).Data());
1497  Architecture_t::PrintTensor(deepNet.GetLayerAt(l)->GetWeightGradientsAt(0),"weight gradients");
1498  }
1499  }
1500 #endif
1501 
1502  }
1503 
1504  if (debugFirstEpoch) std::cout << "\n End batch loop - compute validation loss \n";
1505  //}
1506  debugFirstEpoch = false;
1507  if ((optimizer->GetGlobalStep() % settings.testInterval) == 0) {
1508 
1509  std::chrono::time_point<std::chrono::system_clock> t1,t2;
1510 
1511  t1 = std::chrono::system_clock::now();
1512 
1513  // Compute validation error.
1514 
1515 
1516  Double_t valError = 0.0;
1517  bool inTraining = false;
1518  for (auto batch : validationData) {
1519  auto inputTensor = batch.GetInput();
1520  auto outputMatrix = batch.GetOutput();
1521  auto weights = batch.GetWeights();
1522  // should we apply droput to the loss ??
1523  valError += deepNet.Loss(inputTensor, outputMatrix, weights, inTraining, includeRegularization);
1524  }
1525  // normalize loss to number of batches and add regularization term
1526  Double_t regTerm = (includeRegularization) ? deepNet.RegularizationTerm() : 0.0;
1527  valError /= (Double_t)(nValidationSamples / settings.batchSize);
1528  valError += regTerm;
1529 
1530  //Log the loss value
1531  fTrainHistory.AddValue("valError",optimizer->GetGlobalStep(),valError);
1532 
1533  t2 = std::chrono::system_clock::now();
1534 
1535  // checking for convergence
1536  if (valError < minValError) {
1537  convergenceCount = 0;
1538  } else {
1539  convergenceCount += settings.testInterval;
1540  }
1541 
1542  // copy configuration when reached a minimum error
1543  if (valError < minValError ) {
1544  // Copy weights from deepNet to fNet
1545  Log() << std::setw(10) << optimizer->GetGlobalStep()
1546  << " Minimum Test error found - save the configuration " << Endl;
1547  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1548  fNet->GetLayerAt(i)->CopyParameters(*deepNet.GetLayerAt(i));
1549  // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1550  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), " input weights");
1551  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), " state weights");
1552  // }
1553  }
1554  // Architecture_t::PrintTensor(deepNet.GetLayerAt(1)->GetWeightsAt(0), " cudnn weights");
1555  // ArchitectureImpl_t::PrintTensor(fNet->GetLayerAt(1)->GetWeightsAt(0), " cpu weights");
1556 
1557  minValError = valError;
1558  }
1559  else if ( minValError <= 0. )
1560  minValError = valError;
1561 
1562  if (!computeLossInTraining) {
1563  trainingError = 0.0;
1564  // Compute training error.
1565  for (auto batch : trainingData) {
1566  auto inputTensor = batch.GetInput();
1567  auto outputMatrix = batch.GetOutput();
1568  auto weights = batch.GetWeights();
1569  trainingError += deepNet.Loss(inputTensor, outputMatrix, weights, false, false);
1570  }
1571  }
1572  // normalize loss to number of batches and add regularization term
1573  trainingError /= (Double_t)(nTrainingSamples / settings.batchSize);
1574  trainingError += regTerm;
1575 
1576  //Log the loss value
1577  fTrainHistory.AddValue("trainingError",optimizer->GetGlobalStep(),trainingError);
1578 
1579  // stop measuring
1580  tend = std::chrono::system_clock::now();
1581 
1582  // Compute numerical throughput.
1583  std::chrono::duration<double> elapsed_seconds = tend - tstart;
1584  std::chrono::duration<double> elapsed1 = t1-tstart;
1585  // std::chrono::duration<double> elapsed2 = t2-tstart;
1586  // time to compute training and test errors
1587  std::chrono::duration<double> elapsed_testing = tend-t1;
1588 
1589  double seconds = elapsed_seconds.count();
1590  // double nGFlops = (double)(settings.testInterval * batchesInEpoch * settings.batchSize)*1.E-9;
1591  // nGFlops *= deepnet.GetNFlops() * 1e-9;
1592  double eventTime = elapsed1.count()/( batchesInEpoch * settings.testInterval * settings.batchSize);
1593 
1594  converged =
1595  convergenceCount > settings.convergenceSteps || optimizer->GetGlobalStep() >= settings.maxEpochs;
1596 
1597 
1598  Log() << std::setw(10) << optimizer->GetGlobalStep() << " | "
1599  << std::setw(12) << trainingError
1600  << std::setw(12) << valError
1601  << std::setw(12) << seconds / settings.testInterval
1602  << std::setw(12) << elapsed_testing.count()
1603  << std::setw(12) << 1. / eventTime
1604  << std::setw(12) << convergenceCount
1605  << Endl;
1606 
1607  if (converged) {
1608  Log() << Endl;
1609  }
1610  tstart = std::chrono::system_clock::now();
1611  }
1612 
1613  // if (stepCount % 10 == 0 || converged) {
1614  if (converged && debug) {
1615  Log() << "Final Deep Net Weights for phase " << trainingPhase << " epoch " << optimizer->GetGlobalStep()
1616  << Endl;
1617  auto & weights_tensor = deepNet.GetLayerAt(0)->GetWeights();
1618  auto & bias_tensor = deepNet.GetLayerAt(0)->GetBiases();
1619  for (size_t l = 0; l < weights_tensor.size(); ++l)
1620  weights_tensor[l].Print();
1621  bias_tensor[0].Print();
1622  }
1623 
1624  }
1625 
1626  trainingPhase++;
1627  } // end loop on training Phase
1628 }
1629 
1630 ////////////////////////////////////////////////////////////////////////////////
1631 void MethodDL::Train()
1632 {
1633  if (fInteractive) {
1634  Log() << kFATAL << "Not implemented yet" << Endl;
1635  return;
1636  }
1637 
1638  // using for training same scalar type defined for the prediction
1639  if (this->GetArchitectureString() == "GPU") {
1640 #ifdef R__HAS_TMVAGPU
1641  Log() << kINFO << "Start of deep neural network training on GPU." << Endl << Endl;
1642 #ifdef R__HAS_CUDNN
1643  TrainDeepNet<DNN::TCudnn<ScalarImpl_t> >();
1644 #else
1645  TrainDeepNet<DNN::TCuda<ScalarImpl_t>>();
1646 #endif
1647 #else
1648  Log() << kFATAL << "CUDA backend not enabled. Please make sure "
1649  "you have CUDA installed and it was successfully "
1650  "detected by CMAKE."
1651  << Endl;
1652  return;
1653 #endif
1654  } else if (this->GetArchitectureString() == "OPENCL") {
1655  Log() << kFATAL << "OPENCL backend not yet supported." << Endl;
1656  return;
1657  } else if (this->GetArchitectureString() == "CPU") {
1658 #ifdef R__HAS_TMVACPU
1659  // note that number of threads used for BLAS might be different
1660  // e.g use openblas_set_num_threads(num_threads) for OPENBLAS backend
1661  Log() << kINFO << "Start of deep neural network training on CPU using (for ROOT-IMT) nthreads = "
1662  << gConfig().GetNCpu() << Endl << Endl;
1663  TrainDeepNet<DNN::TCpu<ScalarImpl_t> >();
1664 #else
1665  Log() << kFATAL << "Multi-core CPU backend not enabled. Please make sure "
1666  "you have a BLAS implementation and it was successfully "
1667  "detected by CMake as well that the imt CMake flag is set."
1668  << Endl;
1669  return;
1670 #endif
1671  } else if (this->GetArchitectureString() == "STANDARD") {
1672  Log() << kINFO << "Start of deep neural network training on the STANDARD architecture" << Endl << Endl;
1673 #if HAVE_REFERENCE
1674  TrainDeepNet<DNN::TReference<ScalarImpl_t> >();
1675 #endif
1676  }
1677  else {
1678  Log() << kFATAL << this->GetArchitectureString() <<
1679  " is not a supported archiectire for TMVA::MethodDL"
1680  << Endl;
1681  }
1682 
1683 // /// definitions for CUDA
1684 // #ifdef R__HAS_TMVAGPU // Included only if DNNCUDA flag is set.
1685 // using Architecture_t = DNN::TCuda<Double_t>;
1686 // #else
1687 // #ifdef R__HAS_TMVACPU // Included only if DNNCPU flag is set.
1688 // using Architecture_t = DNN::TCpu<Double_t>;
1689 // #else
1690 // using Architecture_t = DNN::TReference<Double_t>;
1691 // #endif
1692 // #endif
1693 }
1694 
1695 
1696 ////////////////////////////////////////////////////////////////////////////////
1697 Double_t MethodDL::GetMvaValue(Double_t * /*errLower*/, Double_t * /*errUpper*/)
1698 {
1699 
1700  // note that fNet should have been build with a batch size of 1
1701 
1702  if (!fNet || fNet->GetDepth() == 0) {
1703  Log() << kFATAL << "The network has not been trained and fNet is not built"
1704  << Endl;
1705  }
1706 
1707  // input size must be equal to 1 which is the batch size of fNet
1708  R__ASSERT(fNet->GetBatchSize() == 1);
1709 
1710  // int batchWidth = fNet->GetBatchWidth();
1711  // int batchDepth = fNet->GetBatchDepth();
1712  // int batchHeight = fNet->GetBatchHeight();
1713 // int noutput = fNet->GetOutputWidth();
1714 
1715 
1716  // get current event
1717  const std::vector<Float_t> &inputValues = GetEvent()->GetValues();
1718 
1719  size_t nVariables = GetEvent()->GetNVariables();
1720 
1721  // for Columnlayout tensor memory layout is HWC while for rowwise is CHW
1722  if (fXInput.GetLayout() == TMVA::Experimental::MemoryLayout::ColumnMajor) {
1723  R__ASSERT(fXInput.GetShape().size() < 4);
1724  size_t nc, nhw = 0;
1725  if (fXInput.GetShape().size() == 2) {
1726  nc = fXInput.GetShape()[0];
1727  if (nc != 1 ) {
1728  ArchitectureImpl_t::PrintTensor(fXInput);
1729  Log() << kFATAL << "First tensor dimension should be equal to batch size, i.e. = 1"
1730  << Endl;
1731  }
1732  nhw = fXInput.GetShape()[1];
1733  } else {
1734  nc = fXInput.GetCSize();
1735  nhw = fXInput.GetWSize();
1736  }
1737  if ( nVariables != nc * nhw) {
1738  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1739  << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nhw
1740  << Endl;
1741  }
1742  for (size_t j = 0; j < nc; j++) {
1743  for (size_t k = 0; k < nhw; k++) {
1744  // note that in TMVA events images are stored as C H W while in the buffer we stored as H W C
1745  fXInputBuffer[ k * nc + j] = inputValues[j*nhw + k]; // for column layout !!!
1746  }
1747  }
1748  } else {
1749  // row-wise layout
1750  assert(fXInput.GetShape().size() >= 4);
1751  size_t nc = fXInput.GetCSize();
1752  size_t nh = fXInput.GetHSize();
1753  size_t nw = fXInput.GetWSize();
1754  size_t n = nc * nh * nw;
1755  if ( nVariables != n) {
1756  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1757  << " n-event variables " << nVariables << " expected input tensor " << nc << " x " << nh << " x " << nw
1758  << Endl;
1759  }
1760  for (size_t j = 0; j < n; j++) {
1761  // in this case TMVA event has same order as input tensor
1762  fXInputBuffer[ j ] = inputValues[j]; // for column layout !!!
1763  }
1764  }
1765  // copy buffer in input
1766  fXInput.GetDeviceBuffer().CopyFrom( fXInputBuffer);
1767 
1768  // perform the prediction
1769  fNet->Prediction(*fYHat, fXInput, fOutputFunction);
1770 
1771  // return value
1772  double mvaValue = (*fYHat)(0, 0);
1773 
1774  // for debugging
1775 #ifdef DEBUG_MVAVALUE
1776  using Tensor_t = std::vector<MatrixImpl_t>;
1777  TMatrixF xInput(n1,n2, inputValues.data() );
1778  std::cout << "Input data - class " << GetEvent()->GetClass() << std::endl;
1779  xInput.Print();
1780  std::cout << "Output of DeepNet " << mvaValue << std::endl;
1781  auto & deepnet = *fNet;
1782  std::cout << "Loop on layers " << std::endl;
1783  for (int l = 0; l < deepnet.GetDepth(); ++l) {
1784  std::cout << "Layer " << l;
1785  const auto * layer = deepnet.GetLayerAt(l);
1786  const Tensor_t & layer_output = layer->GetOutput();
1787  layer->Print();
1788  std::cout << "DNN output " << layer_output.size() << std::endl;
1789  for (size_t i = 0; i < layer_output.size(); ++i) {
1790 #ifdef R__HAS_TMVAGPU
1791  //TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetDataPointer() );
1792  TMatrixD m = layer_output[i];
1793 #else
1794  TMatrixD m(layer_output[i].GetNrows(), layer_output[i].GetNcols() , layer_output[i].GetRawDataPointer() );
1795 #endif
1796  m.Print();
1797  }
1798  const Tensor_t & layer_weights = layer->GetWeights();
1799  std::cout << "DNN weights " << layer_weights.size() << std::endl;
1800  if (layer_weights.size() > 0) {
1801  int i = 0;
1802 #ifdef R__HAS_TMVAGPU
1803  TMatrixD m = layer_weights[i];
1804 // TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetDataPointer() );
1805 #else
1806  TMatrixD m(layer_weights[i].GetNrows(), layer_weights[i].GetNcols() , layer_weights[i].GetRawDataPointer() );
1807 #endif
1808  m.Print();
1809  }
1810  }
1811 #endif
1812 
1813  return (TMath::IsNaN(mvaValue)) ? -999. : mvaValue;
1814 }
1815 ////////////////////////////////////////////////////////////////////////////////
1816 /// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
1817 ////////////////////////////////////////////////////////////////////////////////
1818 template <typename Architecture_t>
1819 std::vector<Double_t> MethodDL::PredictDeepNet(Long64_t firstEvt, Long64_t lastEvt, size_t batchSize, Bool_t logProgress)
1820 {
1821 
1822  // Check whether the model is setup
1823  if (!fNet || fNet->GetDepth() == 0) {
1824  Log() << kFATAL << "The network has not been trained and fNet is not built"
1825  << Endl;
1826  }
1827 
1828  // rebuild the networks
1829  this->SetBatchSize(batchSize);
1830  size_t inputDepth = this->GetInputDepth();
1831  size_t inputHeight = this->GetInputHeight();
1832  size_t inputWidth = this->GetInputWidth();
1833  size_t batchDepth = this->GetBatchDepth();
1834  size_t batchHeight = this->GetBatchHeight();
1835  size_t batchWidth = this->GetBatchWidth();
1836  ELossFunction J = fNet->GetLossFunction();
1837  EInitialization I = fNet->GetInitialization();
1838  ERegularization R = fNet->GetRegularization();
1839  Double_t weightDecay = fNet->GetWeightDecay();
1840 
1841  using DeepNet_t = TMVA::DNN::TDeepNet<Architecture_t>;
1842  using Matrix_t = typename Architecture_t::Matrix_t;
1843  using TensorDataLoader_t = TTensorDataLoader<TMVAInput_t, Architecture_t>;
1844 
1845  // create the deep neural network
1846  DeepNet_t deepNet(batchSize, inputDepth, inputHeight, inputWidth, batchDepth, batchHeight, batchWidth, J, I, R, weightDecay);
1847  std::vector<DeepNet_t> nets{};
1848  fBuildNet = false;
1849  CreateDeepNet(deepNet,nets);
1850 
1851  // copy weights from the saved fNet to the built DeepNet
1852  for (size_t i = 0; i < deepNet.GetDepth(); ++i) {
1853  deepNet.GetLayerAt(i)->CopyParameters(*fNet->GetLayerAt(i));
1854  // if (i == 0 && deepNet.GetLayerAt(0)->GetWeights().size() > 1) {
1855  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(0), "Inference: input weights");
1856  // Architecture_t::PrintTensor(deepNet.GetLayerAt(0)->GetWeightsAt(1), "Inference: state weights");
1857  // }
1858  }
1859 
1860  size_t n1 = deepNet.GetBatchHeight();
1861  size_t n2 = deepNet.GetBatchWidth();
1862  size_t n0 = deepNet.GetBatchSize();
1863  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
1864  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1) {
1865  n1 = deepNet.GetBatchSize();
1866  n0 = 1;
1867  }
1868  //this->SetBatchDepth(n0);
1869  Long64_t nEvents = lastEvt - firstEvt;
1870  TMVAInput_t testTuple = std::tie(GetEventCollection(Data()->GetCurrentType()), DataInfo());
1871  TensorDataLoader_t testData(testTuple, nEvents, batchSize, {inputDepth, inputHeight, inputWidth}, {n0, n1, n2}, deepNet.GetOutputWidth(), 1);
1872 
1873 
1874  // Tensor_t xInput;
1875  // for (size_t i = 0; i < n0; ++i)
1876  // xInput.emplace_back(Matrix_t(n1,n2));
1877 
1878  // create pointer to output matrix used for the predictions
1879  Matrix_t yHat(deepNet.GetBatchSize(), deepNet.GetOutputWidth() );
1880 
1881  // use timer
1882  Timer timer( nEvents, GetName(), kTRUE );
1883 
1884  if (logProgress)
1885  Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
1886  << "Evaluation of " << GetMethodName() << " on "
1887  << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
1888  << " sample (" << nEvents << " events)" << Endl;
1889 
1890 
1891  // eventg loop
1892  std::vector<double> mvaValues(nEvents);
1893 
1894 
1895  for ( Long64_t ievt = firstEvt; ievt < lastEvt; ievt+=batchSize) {
1896 
1897  Long64_t ievt_end = ievt + batchSize;
1898  // case of batch prediction for
1899  if (ievt_end <= lastEvt) {
1900 
1901  if (ievt == firstEvt) {
1902  Data()->SetCurrentEvent(ievt);
1903  size_t nVariables = GetEvent()->GetNVariables();
1904 
1905  if (n1 == batchSize && n0 == 1) {
1906  if (n2 != nVariables) {
1907  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1908  << " n-event variables " << nVariables << " expected input matrix " << n1 << " x " << n2
1909  << Endl;
1910  }
1911  } else {
1912  if (n1*n2 != nVariables || n0 != batchSize) {
1913  Log() << kFATAL << "Input Event variable dimensions are not compatible with the built network architecture"
1914  << " n-event variables " << nVariables << " expected input tensor " << n0 << " x " << n1 << " x " << n2
1915  << Endl;
1916  }
1917  }
1918  }
1919 
1920  auto batch = testData.GetTensorBatch();
1921  auto inputTensor = batch.GetInput();
1922 
1923  auto xInput = batch.GetInput();
1924  // make the prediction
1925  deepNet.Prediction(yHat, xInput, fOutputFunction);
1926  for (size_t i = 0; i < batchSize; ++i) {
1927  double value = yHat(i,0);
1928  mvaValues[ievt + i] = (TMath::IsNaN(value)) ? -999. : value;
1929  }
1930  }
1931  else {
1932  // case of remaining events: compute prediction by single event !
1933  for (Long64_t i = ievt; i < lastEvt; ++i) {
1934  Data()->SetCurrentEvent(i);
1935  mvaValues[i] = GetMvaValue();
1936  }
1937  }
1938  }
1939 
1940  if (logProgress) {
1941  Log() << kINFO
1942  << "Elapsed time for evaluation of " << nEvents << " events: "
1943  << timer.GetElapsedTime() << " " << Endl;
1944  }
1945 
1946  return mvaValues;
1947 }
1948 
1949 const std::vector<Float_t> & TMVA::MethodDL::GetRegressionValues()
1950 {
1951  size_t nVariables = GetEvent()->GetNVariables();
1952  MatrixImpl_t X(1, nVariables);
1953  TensorImpl_t X_vec ( 1, 1, nVariables); // needs to be really 1
1954  const Event *ev = GetEvent();
1955  const std::vector<Float_t>& inputValues = ev->GetValues();
1956  for (size_t i = 0; i < nVariables; i++) {
1957  X_vec(0,i,0) = inputValues[i]; // in case of column format !!
1958  }
1959  //X_vec.emplace_back(X);
1960 
1961  size_t nTargets = std::max(1u, ev->GetNTargets());
1962  MatrixImpl_t YHat(1, nTargets);
1963  std::vector<Float_t> output(nTargets);
1964  fNet->Prediction(YHat, X_vec, fOutputFunction);
1965 
1966  for (size_t i = 0; i < nTargets; i++)
1967  output[i] = YHat(0, i);
1968 
1969  if (fRegressionReturnVal == NULL) {
1970  fRegressionReturnVal = new std::vector<Float_t>();
1971  }
1972  fRegressionReturnVal->clear();
1973 
1974  Event * evT = new Event(*ev);
1975  for (size_t i = 0; i < nTargets; ++i) {
1976  evT->SetTarget(i, output[i]);
1977  }
1978 
1979  const Event* evT2 = GetTransformationHandler().InverseTransform(evT);
1980  for (size_t i = 0; i < nTargets; ++i) {
1981  fRegressionReturnVal->push_back(evT2->GetTarget(i));
1982  }
1983  delete evT;
1984  return *fRegressionReturnVal;
1985 }
1986 
1987 const std::vector<Float_t> & TMVA::MethodDL::GetMulticlassValues()
1988 {
1989  size_t nVariables = GetEvent()->GetNVariables();
1990  MatrixImpl_t X(1, nVariables);
1991  TensorImpl_t X_vec ( 1, 1, nVariables);
1992  MatrixImpl_t YHat(1, DataInfo().GetNClasses());
1993  if (fMulticlassReturnVal == NULL) {
1994  fMulticlassReturnVal = new std::vector<Float_t>(DataInfo().GetNClasses());
1995  }
1996 
1997  const std::vector<Float_t>& inputValues = GetEvent()->GetValues();
1998  for (size_t i = 0; i < nVariables; i++) {
1999  X_vec(0,i, 0) = inputValues[i];
2000  }
2001  //X_vec.emplace_back(X);
2002  fNet->Prediction(YHat, X_vec, fOutputFunction);
2003  for (size_t i = 0; i < (size_t) YHat.GetNcols(); i++) {
2004  (*fMulticlassReturnVal)[i] = YHat(0, i);
2005  }
2006  return *fMulticlassReturnVal;
2007 }
2008 
2009 
2010 ////////////////////////////////////////////////////////////////////////////////
2011 /// Evaluate the DeepNet on a vector of input values stored in the TMVA Event class
2012 ////////////////////////////////////////////////////////////////////////////////
2013 std::vector<Double_t> MethodDL::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
2014 {
2015  // Long64_t nEvents = Data()->GetNEvents();
2016  // std::vector<Double_t> v(nEvents);
2017  // for (Long64_t i = 0; i < nEvents; ++i) {
2018  // Data()->SetCurrentEvent(i);
2019  // v[i] = GetMvaValue();
2020  // }
2021  // return v;
2022 
2023 
2024  Long64_t nEvents = Data()->GetNEvents();
2025  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
2026  if (firstEvt < 0) firstEvt = 0;
2027  nEvents = lastEvt-firstEvt;
2028 
2029  // use same batch size as for training (from first strategy)
2030  size_t defaultEvalBatchSize = (fXInput.GetSize() > 1000) ? 100 : 1000;
2031  size_t batchSize = (fTrainingSettings.empty()) ? defaultEvalBatchSize : fTrainingSettings.front().batchSize;
2032  if ( size_t(nEvents) < batchSize ) batchSize = nEvents;
2033 
2034  // using for training same scalar type defined for the prediction
2035  if (this->GetArchitectureString() == "GPU") {
2036 #ifdef R__HAS_TMVAGPU
2037  Log() << kINFO << "Evaluate deep neural network on GPU using batches with size = " << batchSize << Endl << Endl;
2038 #ifdef R__HAS_CUDNN
2039  return PredictDeepNet<DNN::TCudnn<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2040 #else
2041  return PredictDeepNet<DNN::TCuda<ScalarImpl_t>>(firstEvt, lastEvt, batchSize, logProgress);
2042 #endif
2043 
2044 #endif
2045  } else if (this->GetArchitectureString() == "CPU") {
2046 //#ifdef R__HAS_TMVACPU
2047  Log() << kINFO << "Evaluate deep neural network on CPU using batches with size = " << batchSize << Endl << Endl;
2048  return PredictDeepNet<DNN::TCpu<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2049 //#endif
2050  }
2051  Log() << kINFO << "ERROR: STANDARD architecture is not supported anymore for MethodDL ! "
2052  << Endl << Endl;
2053 // #if HAVE_REFERENCE
2054 // return PredictDeepNet<DNN::TReference<ScalarImpl_t> >(firstEvt, lastEvt, batchSize, logProgress);
2055 // #else
2056  return std::vector<Double_t>(nEvents,TMath::QuietNaN());
2057 //#endif
2058 
2059 }
2060 ////////////////////////////////////////////////////////////////////////////////
2061 void MethodDL::AddWeightsXMLTo(void * parent) const
2062 {
2063  // Create the parent XML node with name "Weights"
2064  auto & xmlEngine = gTools().xmlengine();
2065  void* nn = xmlEngine.NewChild(parent, 0, "Weights");
2066 
2067  /*! Get all necessary information, in order to be able to reconstruct the net
2068  * if we read the same XML file. */
2069 
2070  // Deep Net specific info
2071  Int_t depth = fNet->GetDepth();
2072 
2073  Int_t inputDepth = fNet->GetInputDepth();
2074  Int_t inputHeight = fNet->GetInputHeight();
2075  Int_t inputWidth = fNet->GetInputWidth();
2076 
2077  Int_t batchSize = fNet->GetBatchSize();
2078 
2079  Int_t batchDepth = fNet->GetBatchDepth();
2080  Int_t batchHeight = fNet->GetBatchHeight();
2081  Int_t batchWidth = fNet->GetBatchWidth();
2082 
2083  char lossFunction = static_cast<char>(fNet->GetLossFunction());
2084  char initialization = static_cast<char>(fNet->GetInitialization());
2085  char regularization = static_cast<char>(fNet->GetRegularization());
2086 
2087  Double_t weightDecay = fNet->GetWeightDecay();
2088 
2089  // Method specific info (not sure these are needed)
2090  char outputFunction = static_cast<char>(this->GetOutputFunction());
2091  //char lossFunction = static_cast<char>(this->GetLossFunction());
2092 
2093  // Add attributes to the parent node
2094  xmlEngine.NewAttr(nn, 0, "NetDepth", gTools().StringFromInt(depth));
2095 
2096  xmlEngine.NewAttr(nn, 0, "InputDepth", gTools().StringFromInt(inputDepth));
2097  xmlEngine.NewAttr(nn, 0, "InputHeight", gTools().StringFromInt(inputHeight));
2098  xmlEngine.NewAttr(nn, 0, "InputWidth", gTools().StringFromInt(inputWidth));
2099 
2100  xmlEngine.NewAttr(nn, 0, "BatchSize", gTools().StringFromInt(batchSize));
2101  xmlEngine.NewAttr(nn, 0, "BatchDepth", gTools().StringFromInt(batchDepth));
2102  xmlEngine.NewAttr(nn, 0, "BatchHeight", gTools().StringFromInt(batchHeight));
2103  xmlEngine.NewAttr(nn, 0, "BatchWidth", gTools().StringFromInt(batchWidth));
2104 
2105  xmlEngine.NewAttr(nn, 0, "LossFunction", TString(lossFunction));
2106  xmlEngine.NewAttr(nn, 0, "Initialization", TString(initialization));
2107  xmlEngine.NewAttr(nn, 0, "Regularization", TString(regularization));
2108  xmlEngine.NewAttr(nn, 0, "OutputFunction", TString(outputFunction));
2109 
2110  gTools().AddAttr(nn, "WeightDecay", weightDecay);
2111 
2112 
2113  for (Int_t i = 0; i < depth; i++)
2114  {
2115  fNet->GetLayerAt(i) -> AddWeightsXMLTo(nn);
2116  }
2117 
2118 
2119 }
2120 
2121 ////////////////////////////////////////////////////////////////////////////////
2122 void MethodDL::ReadWeightsFromXML(void * rootXML)
2123 {
2124 
2125  auto netXML = gTools().GetChild(rootXML, "Weights");
2126  if (!netXML){
2127  netXML = rootXML;
2128  }
2129 
2130  size_t netDepth;
2131  gTools().ReadAttr(netXML, "NetDepth", netDepth);
2132 
2133  size_t inputDepth, inputHeight, inputWidth;
2134  gTools().ReadAttr(netXML, "InputDepth", inputDepth);
2135  gTools().ReadAttr(netXML, "InputHeight", inputHeight);
2136  gTools().ReadAttr(netXML, "InputWidth", inputWidth);
2137 
2138  size_t batchSize, batchDepth, batchHeight, batchWidth;
2139  gTools().ReadAttr(netXML, "BatchSize", batchSize);
2140  // use always batchsize = 1
2141  //batchSize = 1;
2142  gTools().ReadAttr(netXML, "BatchDepth", batchDepth);
2143  gTools().ReadAttr(netXML, "BatchHeight", batchHeight);
2144  gTools().ReadAttr(netXML, "BatchWidth", batchWidth);
2145 
2146  char lossFunctionChar;
2147  gTools().ReadAttr(netXML, "LossFunction", lossFunctionChar);
2148  char initializationChar;
2149  gTools().ReadAttr(netXML, "Initialization", initializationChar);
2150  char regularizationChar;
2151  gTools().ReadAttr(netXML, "Regularization", regularizationChar);
2152  char outputFunctionChar;
2153  gTools().ReadAttr(netXML, "OutputFunction", outputFunctionChar);
2154  double weightDecay;
2155  gTools().ReadAttr(netXML, "WeightDecay", weightDecay);
2156 
2157  // create the net
2158 
2159  // DeepNetCpu_t is defined in MethodDL.h
2160  this->SetInputDepth(inputDepth);
2161  this->SetInputHeight(inputHeight);
2162  this->SetInputWidth(inputWidth);
2163  this->SetBatchDepth(batchDepth);
2164  this->SetBatchHeight(batchHeight);
2165  this->SetBatchWidth(batchWidth);
2166 
2167 
2168 
2169  fNet = std::unique_ptr<DeepNetImpl_t>(new DeepNetImpl_t(batchSize, inputDepth, inputHeight, inputWidth, batchDepth,
2170  batchHeight, batchWidth,
2171  static_cast<ELossFunction>(lossFunctionChar),
2172  static_cast<EInitialization>(initializationChar),
2173  static_cast<ERegularization>(regularizationChar),
2174  weightDecay));
2175 
2176  fOutputFunction = static_cast<EOutputFunction>(outputFunctionChar);
2177 
2178 
2179  //size_t previousWidth = inputWidth;
2180  auto layerXML = gTools().xmlengine().GetChild(netXML);
2181 
2182  // loop on the layer and add them to the network
2183  for (size_t i = 0; i < netDepth; i++) {
2184 
2185  TString layerName = gTools().xmlengine().GetNodeName(layerXML);
2186 
2187  // case of dense layer
2188  if (layerName == "DenseLayer") {
2189 
2190  // read width and activation function and then we can create the layer
2191  size_t width = 0;
2192  gTools().ReadAttr(layerXML, "Width", width);
2193 
2194  // Read activation function.
2195  TString funcString;
2196  gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2197  EActivationFunction func = static_cast<EActivationFunction>(funcString.Atoi());
2198 
2199 
2200  fNet->AddDenseLayer(width, func, 0.0); // no need to pass dropout probability
2201 
2202  }
2203  // Convolutional Layer
2204  else if (layerName == "ConvLayer") {
2205 
2206  // read width and activation function and then we can create the layer
2207  size_t depth = 0;
2208  gTools().ReadAttr(layerXML, "Depth", depth);
2209  size_t fltHeight, fltWidth = 0;
2210  size_t strideRows, strideCols = 0;
2211  size_t padHeight, padWidth = 0;
2212  gTools().ReadAttr(layerXML, "FilterHeight", fltHeight);
2213  gTools().ReadAttr(layerXML, "FilterWidth", fltWidth);
2214  gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2215  gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2216  gTools().ReadAttr(layerXML, "PaddingHeight", padHeight);
2217  gTools().ReadAttr(layerXML, "PaddingWidth", padWidth);
2218 
2219  // Read activation function.
2220  TString funcString;
2221  gTools().ReadAttr(layerXML, "ActivationFunction", funcString);
2222  EActivationFunction actFunction = static_cast<EActivationFunction>(funcString.Atoi());
2223 
2224 
2225  fNet->AddConvLayer(depth, fltHeight, fltWidth, strideRows, strideCols,
2226  padHeight, padWidth, actFunction);
2227 
2228  }
2229 
2230  // MaxPool Layer
2231  else if (layerName == "MaxPoolLayer") {
2232 
2233  // read maxpool layer info
2234  size_t filterHeight, filterWidth = 0;
2235  size_t strideRows, strideCols = 0;
2236  gTools().ReadAttr(layerXML, "FilterHeight", filterHeight);
2237  gTools().ReadAttr(layerXML, "FilterWidth", filterWidth);
2238  gTools().ReadAttr(layerXML, "StrideRows", strideRows);
2239  gTools().ReadAttr(layerXML, "StrideCols", strideCols);
2240 
2241  fNet->AddMaxPoolLayer(filterHeight, filterWidth, strideRows, strideCols);
2242  }
2243  else if (layerName == "ReshapeLayer") {
2244 
2245  // read reshape layer info
2246  size_t depth, height, width = 0;
2247  gTools().ReadAttr(layerXML, "Depth", depth);
2248  gTools().ReadAttr(layerXML, "Height", height);
2249  gTools().ReadAttr(layerXML, "Width", width);
2250  int flattening = 0;
2251  gTools().ReadAttr(layerXML, "Flattening",flattening );
2252 
2253  fNet->AddReshapeLayer(depth, height, width, flattening);
2254 
2255  }
2256  else if (layerName == "RNNLayer") {
2257 
2258  // read RNN layer info
2259  size_t stateSize,inputSize, timeSteps = 0;
2260  int rememberState= 0;
2261  gTools().ReadAttr(layerXML, "StateSize", stateSize);
2262  gTools().ReadAttr(layerXML, "InputSize", inputSize);
2263  gTools().ReadAttr(layerXML, "TimeSteps", timeSteps);
2264  gTools().ReadAttr(layerXML, "RememberState", rememberState );
2265 
2266  fNet->AddBasicRNNLayer(stateSize, inputSize, timeSteps, rememberState);
2267 
2268  }
2269  // BatchNorm Layer
2270  else if (layerName == "BatchNormLayer") {
2271  // use some dammy value which will be overwrittem in BatchNormLayer::ReadWeightsFromXML
2272  fNet->AddBatchNormLayer(0., 0.0);
2273  }
2274 
2275 
2276  // read eventually weights and biases
2277  fNet->GetLayers().back()->ReadWeightsFromXML(layerXML);
2278 
2279  // read next layer
2280  layerXML = gTools().GetNextChild(layerXML);
2281  }
2282 
2283  fBuildNet = false;
2284  // create now the input and output matrices
2285  //int n1 = batchHeight;
2286  //int n2 = batchWidth;
2287  // treat case where batchHeight is the batchSize in case of first Dense layers (then we need to set to fNet batch size)
2288  //if (fXInput.size() > 0) fXInput.clear();
2289  //fXInput.emplace_back(MatrixImpl_t(n1,n2));
2290  fXInput = ArchitectureImpl_t::CreateTensor(fNet->GetBatchSize(), GetInputDepth(), GetInputHeight(), GetInputWidth() );
2291  if (batchDepth == 1 && GetInputHeight() == 1 && GetInputDepth() == 1)
2292  // make here a ColumnMajor tensor
2293  fXInput = TensorImpl_t( fNet->GetBatchSize(), GetInputWidth(),TMVA::Experimental::MemoryLayout::ColumnMajor );
2294  fXInputBuffer = HostBufferImpl_t( fXInput.GetSize());
2295 
2296  // create pointer to output matrix used for the predictions
2297  fYHat = std::unique_ptr<MatrixImpl_t>(new MatrixImpl_t(fNet->GetBatchSize(), fNet->GetOutputWidth() ) );
2298 
2299 
2300 }
2301 
2302 
2303 ////////////////////////////////////////////////////////////////////////////////
2304 void MethodDL::ReadWeightsFromStream(std::istream & /*istr*/)
2305 {
2306 }
2307 
2308 ////////////////////////////////////////////////////////////////////////////////
2309 const Ranking *TMVA::MethodDL::CreateRanking()
2310 {
2311  // TODO
2312  return NULL;
2313 }
2314 
2315 ////////////////////////////////////////////////////////////////////////////////
2316 void MethodDL::GetHelpMessage() const
2317 {
2318  // TODO
2319 }
2320 
2321 } // namespace TMVA