Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodPyKeras.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/pymva $Id$
2 // Author: Stefan Wunsch, 2016
3 
4 #include <Python.h>
5 #include "TMVA/MethodPyKeras.h"
6 
7 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
8 #include <numpy/arrayobject.h>
9 
10 #include "TMVA/Types.h"
11 #include "TMVA/Config.h"
12 #include "TMVA/ClassifierFactory.h"
13 #include "TMVA/Results.h"
16 #include "TMVA/Tools.h"
17 #include "TMVA/Timer.h"
18 
19 using namespace TMVA;
20 
21 namespace TMVA {
22 namespace Internal {
23 class PyGILRAII {
24  PyGILState_STATE m_GILState;
25 
26 public:
27  PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
28  ~PyGILRAII() { PyGILState_Release(m_GILState); }
29 };
30 } // namespace Internal
31 } // namespace TMVA
32 
33 REGISTER_METHOD(PyKeras)
34 
35 ClassImp(MethodPyKeras);
36 
37 MethodPyKeras::MethodPyKeras(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption)
38  : PyMethodBase(jobName, Types::kPyKeras, methodTitle, dsi, theOption) {
39  fNumEpochs = 10;
40  fBatchSize = 100;
41  fVerbose = 1;
42  fContinueTraining = false;
43  fSaveBestOnly = true;
44  fTriesEarlyStopping = -1;
45  fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
46  fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
47  fTensorBoard = ""; // empty string deactivates TensorBoard callback
48 }
49 
50 MethodPyKeras::MethodPyKeras(DataSetInfo &theData, const TString &theWeightFile)
51  : PyMethodBase(Types::kPyKeras, theData, theWeightFile) {
52  fNumEpochs = 10;
53  fNumThreads = 0;
54  fBatchSize = 100;
55  fVerbose = 1;
56  fContinueTraining = false;
57  fSaveBestOnly = true;
58  fTriesEarlyStopping = -1;
59  fLearningRateSchedule = ""; // empty string deactivates learning rate scheduler
60  fFilenameTrainedModel = ""; // empty string sets output model filename to default (in weights/)
61  fTensorBoard = ""; // empty string deactivates TensorBoard callback
62 }
63 
64 MethodPyKeras::~MethodPyKeras() {
65 }
66 
67 Bool_t MethodPyKeras::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t) {
68  if (type == Types::kRegression) return kTRUE;
69  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
70  if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
71  return kFALSE;
72 }
73 
74 ///////////////////////////////////////////////////////////////////////////////
75 
76 void MethodPyKeras::DeclareOptions() {
77  DeclareOptionRef(fFilenameModel, "FilenameModel", "Filename of the initial Keras model");
78  DeclareOptionRef(fFilenameTrainedModel, "FilenameTrainedModel", "Filename of the trained output Keras model");
79  DeclareOptionRef(fBatchSize, "BatchSize", "Training batch size");
80  DeclareOptionRef(fNumEpochs, "NumEpochs", "Number of training epochs");
81  DeclareOptionRef(fNumThreads, "NumThreads", "Number of CPU threads (only for Tensorflow backend)");
82  DeclareOptionRef(fGpuOptions, "GpuOptions", "GPU options for tensorflow, such as allow_growth");
83  DeclareOptionRef(fVerbose, "Verbose", "Keras verbosity during training");
84  DeclareOptionRef(fContinueTraining, "ContinueTraining", "Load weights from previous training");
85  DeclareOptionRef(fSaveBestOnly, "SaveBestOnly", "Store only weights with smallest validation loss");
86  DeclareOptionRef(fTriesEarlyStopping, "TriesEarlyStopping", "Number of epochs with no improvement in validation loss after which training will be stopped. The default or a negative number deactivates this option.");
87  DeclareOptionRef(fLearningRateSchedule, "LearningRateSchedule", "Set new learning rate during training at specific epochs, e.g., \"50,0.01;70,0.005\"");
88  DeclareOptionRef(fTensorBoard, "TensorBoard",
89  "Write a log during training to visualize and monitor the training performance with TensorBoard");
90  DeclareOptionRef(fTensorBoard, "TensorBoard",
91  "Write a log during training to visualize and monitor the training performance with TensorBoard");
92 
93  DeclareOptionRef(fNumValidationString = "20%", "ValidationSize", "Part of the training data to use for validation. "
94  "Specify as 0.2 or 20% to use a fifth of the data set as validation set. "
95  "Specify as 100 to use exactly 100 events. (Default: 20%)");
96 
97 }
98 
99 
100 ////////////////////////////////////////////////////////////////////////////////
101 /// Validation of the ValidationSize option. Allowed formats are 20%, 0.2 and
102 /// 100 etc.
103 /// - 20% and 0.2 selects 20% of the training set as validation data.
104 /// - 100 selects 100 events as the validation data.
105 ///
106 /// @return number of samples in validation set
107 ///
108 UInt_t TMVA::MethodPyKeras::GetNumValidationSamples()
109 {
110  Int_t nValidationSamples = 0;
111  UInt_t trainingSetSize = GetEventCollection(Types::kTraining).size();
112 
113  // Parsing + Validation
114  // --------------------
115  if (fNumValidationString.EndsWith("%")) {
116  // Relative spec. format 20%
117  TString intValStr = TString(fNumValidationString.Strip(TString::kTrailing, '%'));
118 
119  if (intValStr.IsFloat()) {
120  Double_t valSizeAsDouble = fNumValidationString.Atof() / 100.0;
121  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
122  } else {
123  Log() << kFATAL << "Cannot parse number \"" << fNumValidationString
124  << "\". Expected string like \"20%\" or \"20.0%\"." << Endl;
125  }
126  } else if (fNumValidationString.IsFloat()) {
127  Double_t valSizeAsDouble = fNumValidationString.Atof();
128 
129  if (valSizeAsDouble < 1.0) {
130  // Relative spec. format 0.2
131  nValidationSamples = GetEventCollection(Types::kTraining).size() * valSizeAsDouble;
132  } else {
133  // Absolute spec format 100 or 100.0
134  nValidationSamples = valSizeAsDouble;
135  }
136  } else {
137  Log() << kFATAL << "Cannot parse number \"" << fNumValidationString << "\". Expected string like \"0.2\" or \"100\"."
138  << Endl;
139  }
140 
141  // Value validation
142  // ----------------
143  if (nValidationSamples < 0) {
144  Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is negative." << Endl;
145  }
146 
147  if (nValidationSamples == 0) {
148  Log() << kFATAL << "Validation size \"" << fNumValidationString << "\" is zero." << Endl;
149  }
150 
151  if (nValidationSamples >= (Int_t)trainingSetSize) {
152  Log() << kFATAL << "Validation size \"" << fNumValidationString
153  << "\" is larger than or equal in size to training set (size=\"" << trainingSetSize << "\")." << Endl;
154  }
155 
156  return nValidationSamples;
157 }
158 
159 void MethodPyKeras::ProcessOptions() {
160  // Set default filename for trained model if option is not used
161  if (fFilenameTrainedModel.IsNull()) {
162  fFilenameTrainedModel = GetWeightFileDir() + "/TrainedModel_" + GetName() + ".h5";
163  }
164 
165  // set here some specific options for Tensorflow backend
166  // - when using tensorflow gpu set option to allow memory growth to avoid allocating all memory
167  // - set up number of threads for CPU if NumThreads option was specified
168 
169  // check first if using tensorflow backend
170  if (GetKerasBackend() == kTensorFlow) {
171  Log() << kINFO << "Using TensorFlow backend - setting special configuration options " << Endl;
172  PyRunString("import tensorflow as tf");
173  PyRunString("from keras.backend import tensorflow_backend as K");
174 
175  // check tensorflow version
176  PyRunString("tf_major_version = int(tf.__version__.split('.')[0])");
177  //PyRunString("print(tf.__version__,'major is ',tf_major_version)");
178  PyObject *pyTfVersion = PyDict_GetItemString(fLocalNS, "tf_major_version");
179  int tfVersion = PyLong_AsLong(pyTfVersion);
180  Log() << kINFO << "Using Tensorflow version " << tfVersion << Endl;
181 
182  // use different naming in tf2 for ConfigProto and Session
183  TString configProto = (tfVersion >= 2) ? "tf.compat.v1.ConfigProto" : "tf.ConfigProto";
184  TString session = (tfVersion >= 2) ? "tf.compat.v1.Session" : "tf.Session";
185 
186  // in case specify number of threads
187  int num_threads = fNumThreads;
188  if (num_threads > 0) {
189  Log() << kINFO << "Setting the CPU number of threads = " << num_threads << Endl;
190 
191  PyRunString(TString::Format("session_conf = %s(intra_op_parallelism_threads=%d,inter_op_parallelism_threads=%d)",
192  configProto.Data(), num_threads,num_threads));
193  }
194  else
195  PyRunString(TString::Format("session_conf = %s()",configProto.Data()));
196 
197  // applying GPU options such as allow_growth=True to avoid allocating all memory on GPU
198  // that prevents running later TMVA-GPU
199  // Also new Nvidia RTX cards (e.g. RTX 2070) require this option
200  if (!fGpuOptions.IsNull() ) {
201  TObjArray * optlist = fGpuOptions.Tokenize(",");
202  for (int item = 0; item < optlist->GetEntries(); ++item) {
203  Log() << kINFO << "Applying GPU option: gpu_options." << optlist->At(item)->GetName() << Endl;
204  PyRunString(TString::Format("session_conf.gpu_options.%s", optlist->At(item)->GetName()));
205  }
206  }
207  PyRunString(TString::Format("sess = %s(config=session_conf)", session.Data()));
208 
209  if (tfVersion < 2) {
210  PyRunString("K.set_session(sess)");
211  } else {
212  PyRunString("tf.compat.v1.keras.backend.set_session(sess)");
213  }
214  }
215  else {
216  if (fNumThreads > 0)
217  Log() << kWARNING << "Cannot set the given " << fNumThreads << " threads when not using tensorflow as backend" << Endl;
218  if (!fGpuOptions.IsNull() ) {
219  Log() << kWARNING << "Cannot set the given GPU option " << fGpuOptions << " when not using tensorflow as backend" << Endl;
220  }
221  }
222 
223  // Setup model, either the initial model from `fFilenameModel` or
224  // the trained model from `fFilenameTrainedModel`
225  if (fContinueTraining) Log() << kINFO << "Continue training with trained model" << Endl;
226  SetupKerasModel(fContinueTraining);
227 }
228 
229 void MethodPyKeras::SetupKerasModel(bool loadTrainedModel) {
230  /*
231  * Load Keras model from file
232  */
233 
234  // Load initial model or already trained model
235  TString filenameLoadModel;
236  if (loadTrainedModel) {
237  filenameLoadModel = fFilenameTrainedModel;
238  }
239  else {
240  filenameLoadModel = fFilenameModel;
241  }
242  PyRunString("model = keras.models.load_model('"+filenameLoadModel+"')",
243  "Failed to load Keras model from file: "+filenameLoadModel);
244  Log() << kINFO << "Load model from file: " << filenameLoadModel << Endl;
245 
246 
247  /*
248  * Init variables and weights
249  */
250 
251  // Get variables, classes and target numbers
252  fNVars = GetNVariables();
253  if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) fNOutputs = DataInfo().GetNClasses();
254  else if (GetAnalysisType() == Types::kRegression) fNOutputs = DataInfo().GetNTargets();
255  else Log() << kFATAL << "Selected analysis type is not implemented" << Endl;
256 
257  // Init evaluation (needed for getMvaValue)
258  fVals = new float[fNVars]; // holds values used for classification and regression
259  npy_intp dimsVals[2] = {(npy_intp)1, (npy_intp)fNVars};
260  PyArrayObject* pVals = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsVals, NPY_FLOAT, (void*)fVals);
261  PyDict_SetItemString(fLocalNS, "vals", (PyObject*)pVals);
262 
263  fOutput.resize(fNOutputs); // holds classification probabilities or regression output
264  npy_intp dimsOutput[2] = {(npy_intp)1, (npy_intp)fNOutputs};
265  PyArrayObject* pOutput = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsOutput, NPY_FLOAT, (void*)&fOutput[0]);
266  PyDict_SetItemString(fLocalNS, "output", (PyObject*)pOutput);
267 
268  // Mark the model as setup
269  fModelIsSetup = true;
270 }
271 
272 void MethodPyKeras::Init() {
273 
274  TMVA::Internal::PyGILRAII raii;
275 
276  if (!PyIsInitialized()) {
277  Log() << kFATAL << "Python is not initialized" << Endl;
278  }
279  _import_array(); // required to use numpy arrays
280 
281  // Import Keras
282  // NOTE: sys.argv has to be cleared because otherwise TensorFlow breaks
283  PyRunString("import sys; sys.argv = ['']", "Set sys.argv failed");
284  PyRunString("import keras", "Import Keras failed");
285 
286  // Set flag that model is not setup
287  fModelIsSetup = false;
288 }
289 
290 void MethodPyKeras::Train() {
291  if(!fModelIsSetup) Log() << kFATAL << "Model is not setup for training" << Endl;
292 
293  /*
294  * Load training data to numpy array
295  */
296 
297  UInt_t nAllEvents = Data()->GetNTrainingEvents();
298  UInt_t nValEvents = GetNumValidationSamples();
299  UInt_t nTrainingEvents = nAllEvents - nValEvents;
300 
301  Log() << kINFO << "Split TMVA training data in " << nTrainingEvents << " training events and "
302  << nValEvents << " validation events" << Endl;
303 
304  float* trainDataX = new float[nTrainingEvents*fNVars];
305  float* trainDataY = new float[nTrainingEvents*fNOutputs];
306  float* trainDataWeights = new float[nTrainingEvents];
307  for (UInt_t i=0; i<nTrainingEvents; i++) {
308  const TMVA::Event* e = GetTrainingEvent(i);
309  // Fill variables
310  for (UInt_t j=0; j<fNVars; j++) {
311  trainDataX[j + i*fNVars] = e->GetValue(j);
312  }
313  // Fill targets
314  // NOTE: For classification, convert class number in one-hot vector,
315  // e.g., 1 -> [0, 1] or 0 -> [1, 0] for binary classification
316  if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) {
317  for (UInt_t j=0; j<fNOutputs; j++) {
318  trainDataY[j + i*fNOutputs] = 0;
319  }
320  trainDataY[e->GetClass() + i*fNOutputs] = 1;
321  }
322  else if (GetAnalysisType() == Types::kRegression) {
323  for (UInt_t j=0; j<fNOutputs; j++) {
324  trainDataY[j + i*fNOutputs] = e->GetTarget(j);
325  }
326  }
327  else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
328  // Fill weights
329  // NOTE: If no weight branch is given, this defaults to ones for all events
330  trainDataWeights[i] = e->GetWeight();
331  }
332 
333  npy_intp dimsTrainX[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNVars};
334  npy_intp dimsTrainY[2] = {(npy_intp)nTrainingEvents, (npy_intp)fNOutputs};
335  npy_intp dimsTrainWeights[1] = {(npy_intp)nTrainingEvents};
336  PyArrayObject* pTrainDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainX, NPY_FLOAT, (void*)trainDataX);
337  PyArrayObject* pTrainDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsTrainY, NPY_FLOAT, (void*)trainDataY);
338  PyArrayObject* pTrainDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsTrainWeights, NPY_FLOAT, (void*)trainDataWeights);
339  PyDict_SetItemString(fLocalNS, "trainX", (PyObject*)pTrainDataX);
340  PyDict_SetItemString(fLocalNS, "trainY", (PyObject*)pTrainDataY);
341  PyDict_SetItemString(fLocalNS, "trainWeights", (PyObject*)pTrainDataWeights);
342 
343  /*
344  * Load validation data to numpy array
345  */
346 
347  // NOTE: from TMVA, we get the validation data as a subset of all the training data
348  // we will not use test data for validation. They will be used for the real testing
349 
350 
351  float* valDataX = new float[nValEvents*fNVars];
352  float* valDataY = new float[nValEvents*fNOutputs];
353  float* valDataWeights = new float[nValEvents];
354  //validation events follows the trainig one in the TMVA training vector
355  for (UInt_t i=0; i< nValEvents ; i++) {
356  UInt_t ievt = nTrainingEvents + i; // TMVA event index
357  const TMVA::Event* e = GetTrainingEvent(ievt);
358  // Fill variables
359  for (UInt_t j=0; j<fNVars; j++) {
360  valDataX[j + i*fNVars] = e->GetValue(j);
361  }
362  // Fill targets
363  if (GetAnalysisType() == Types::kClassification || GetAnalysisType() == Types::kMulticlass) {
364  for (UInt_t j=0; j<fNOutputs; j++) {
365  valDataY[j + i*fNOutputs] = 0;
366  }
367  valDataY[e->GetClass() + i*fNOutputs] = 1;
368  }
369  else if (GetAnalysisType() == Types::kRegression) {
370  for (UInt_t j=0; j<fNOutputs; j++) {
371  valDataY[j + i*fNOutputs] = e->GetTarget(j);
372  }
373  }
374  else Log() << kFATAL << "Can not fill target vector because analysis type is not known" << Endl;
375  // Fill weights
376  valDataWeights[i] = e->GetWeight();
377  }
378 
379  npy_intp dimsValX[2] = {(npy_intp)nValEvents, (npy_intp)fNVars};
380  npy_intp dimsValY[2] = {(npy_intp)nValEvents, (npy_intp)fNOutputs};
381  npy_intp dimsValWeights[1] = {(npy_intp)nValEvents};
382  PyArrayObject* pValDataX = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValX, NPY_FLOAT, (void*)valDataX);
383  PyArrayObject* pValDataY = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsValY, NPY_FLOAT, (void*)valDataY);
384  PyArrayObject* pValDataWeights = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsValWeights, NPY_FLOAT, (void*)valDataWeights);
385  PyDict_SetItemString(fLocalNS, "valX", (PyObject*)pValDataX);
386  PyDict_SetItemString(fLocalNS, "valY", (PyObject*)pValDataY);
387  PyDict_SetItemString(fLocalNS, "valWeights", (PyObject*)pValDataWeights);
388 
389  /*
390  * Train Keras model
391  */
392  Log() << kINFO << "Training Model Summary" << Endl;
393  PyRunString("model.summary()");
394 
395  // Setup parameters
396 
397  PyObject* pBatchSize = PyLong_FromLong(fBatchSize);
398  PyObject* pNumEpochs = PyLong_FromLong(fNumEpochs);
399  PyObject* pVerbose = PyLong_FromLong(fVerbose);
400  PyDict_SetItemString(fLocalNS, "batchSize", pBatchSize);
401  PyDict_SetItemString(fLocalNS, "numEpochs", pNumEpochs);
402  PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
403 
404  // Setup training callbacks
405  PyRunString("callbacks = []");
406 
407  // Callback: Save only weights with smallest validation loss
408  if (fSaveBestOnly) {
409  PyRunString("callbacks.append(keras.callbacks.ModelCheckpoint('"+fFilenameTrainedModel+"', monitor='val_loss', verbose=verbose, save_best_only=True, mode='auto'))", "Failed to setup training callback: SaveBestOnly");
410  Log() << kINFO << "Option SaveBestOnly: Only model weights with smallest validation loss will be stored" << Endl;
411  }
412 
413  // Callback: Stop training early if no improvement in validation loss is observed
414  if (fTriesEarlyStopping>=0) {
415  TString tries;
416  tries.Form("%i", fTriesEarlyStopping);
417  PyRunString("callbacks.append(keras.callbacks.EarlyStopping(monitor='val_loss', patience="+tries+", verbose=verbose, mode='auto'))", "Failed to setup training callback: TriesEarlyStopping");
418  Log() << kINFO << "Option TriesEarlyStopping: Training will stop after " << tries << " number of epochs with no improvement of validation loss" << Endl;
419  }
420 
421  // Callback: Learning rate scheduler
422  if (fLearningRateSchedule!="") {
423  // Setup a python dictionary with the desired learning rate steps
424  PyRunString("strScheduleSteps = '"+fLearningRateSchedule+"'\n"
425  "schedulerSteps = {}\n"
426  "for c in strScheduleSteps.split(';'):\n"
427  " x = c.split(',')\n"
428  " schedulerSteps[int(x[0])] = float(x[1])\n",
429  "Failed to setup steps for scheduler function from string: "+fLearningRateSchedule,
430  Py_file_input);
431  // Set scheduler function as piecewise function with given steps
432  PyRunString("def schedule(epoch, model=model, schedulerSteps=schedulerSteps):\n"
433  " if epoch in schedulerSteps: return float(schedulerSteps[epoch])\n"
434  " else: return float(model.optimizer.lr.get_value())\n",
435  "Failed to setup scheduler function with string: "+fLearningRateSchedule,
436  Py_file_input);
437  // Setup callback
438  PyRunString("callbacks.append(keras.callbacks.LearningRateScheduler(schedule))",
439  "Failed to setup training callback: LearningRateSchedule");
440  Log() << kINFO << "Option LearningRateSchedule: Set learning rate during training: " << fLearningRateSchedule << Endl;
441  }
442 
443  // Callback: TensorBoard
444  if (fTensorBoard != "") {
445  TString logdir = TString("'") + fTensorBoard + TString("'");
446  PyRunString(
447  "callbacks.append(keras.callbacks.TensorBoard(log_dir=" + logdir +
448  ", histogram_freq=0, batch_size=batchSize, write_graph=True, write_grads=False, write_images=False))",
449  "Failed to setup training callback: TensorBoard");
450  Log() << kINFO << "Option TensorBoard: Log files for training monitoring are stored in: " << logdir << Endl;
451  }
452 
453  // Train model
454  PyRunString("history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)",
455  "Failed to train model");
456 
457 
458  std::vector<float> fHistory; // Hold training history (val_acc or loss etc)
459  fHistory.resize(fNumEpochs); // holds training loss or accuracy output
460  npy_intp dimsHistory[1] = { (npy_intp)fNumEpochs};
461  PyArrayObject* pHistory = (PyArrayObject*)PyArray_SimpleNewFromData(1, dimsHistory, NPY_FLOAT, (void*)&fHistory[0]);
462  PyDict_SetItemString(fLocalNS, "HistoryOutput", (PyObject*)pHistory);
463 
464  // Store training history data
465  Int_t iHis=0;
466  PyRunString("number_of_keys=len(history.history.keys())");
467  PyObject* PyNkeys=PyDict_GetItemString(fLocalNS, "number_of_keys");
468  int nkeys=PyLong_AsLong(PyNkeys);
469  for (iHis=0; iHis<nkeys; iHis++) {
470 
471  PyRunString(TString::Format("copy_string=str(list(history.history.keys())[%d])",iHis));
472  //PyRunString("print (copy_string)");
473  PyObject* stra=PyDict_GetItemString(fLocalNS, "copy_string");
474  if(!stra) break;
475 #if PY_MAJOR_VERSION < 3 // for Python2
476  const char *stra_name = PyBytes_AsString(stra);
477  // need to add string delimiter for Python2
478  TString sname = TString::Format("'%s'",stra_name);
479  const char * name = sname.Data();
480 #else // for Python3
481  PyObject* repr = PyObject_Repr(stra);
482  PyObject* str = PyUnicode_AsEncodedString(repr, "utf-8", "~E~");
483  const char *name = PyBytes_AsString(str);
484 #endif
485 
486  Log() << kINFO << "Getting training history for item:" << iHis << " name = " << name << Endl;
487  PyRunString(TString::Format("for i,p in enumerate(history.history[%s]):\n HistoryOutput[i]=p\n",name),
488  TString::Format("Failed to get %s from training history",name));
489  for (size_t i=0; i<fHistory.size(); i++)
490  fTrainHistory.AddValue(name,i+1,fHistory[i]);
491 
492  }
493 //#endif
494 
495  /*
496  * Store trained model to file (only if option 'SaveBestOnly' is NOT activated,
497  * because we do not want to override the best model checkpoint)
498  */
499 
500  if (!fSaveBestOnly) {
501  PyRunString("model.save('"+fFilenameTrainedModel+"', overwrite=True)",
502  "Failed to save trained model: "+fFilenameTrainedModel);
503  Log() << kINFO << "Trained model written to file: " << fFilenameTrainedModel << Endl;
504  }
505 
506  /*
507  * Clean-up
508  */
509 
510  delete[] trainDataX;
511  delete[] trainDataY;
512  delete[] trainDataWeights;
513  delete[] valDataX;
514  delete[] valDataY;
515  delete[] valDataWeights;
516 }
517 
518 void MethodPyKeras::TestClassification() {
519  MethodBase::TestClassification();
520 }
521 
522 Double_t MethodPyKeras::GetMvaValue(Double_t *errLower, Double_t *errUpper) {
523  // Cannot determine error
524  NoErrorCalc(errLower, errUpper);
525 
526  // Check whether the model is setup
527  // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
528  if (!fModelIsSetup) {
529  // Setup the trained model
530  SetupKerasModel(true);
531  }
532 
533  // Get signal probability (called mvaValue here)
534  const TMVA::Event* e = GetEvent();
535  for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
536  PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
537  "Failed to get predictions");
538 
539  return fOutput[TMVA::Types::kSignal];
540 }
541 
542 std::vector<Double_t> MethodPyKeras::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress) {
543  // Check whether the model is setup
544  // NOTE: Unfortunately this is needed because during evaluation ProcessOptions is not called again
545  if (!fModelIsSetup) {
546  // Setup the trained model
547  SetupKerasModel(true);
548  }
549 
550  // Load data to numpy array
551  Long64_t nEvents = Data()->GetNEvents();
552  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
553  if (firstEvt < 0) firstEvt = 0;
554  nEvents = lastEvt-firstEvt;
555 
556  // use timer
557  Timer timer( nEvents, GetName(), kTRUE );
558 
559  if (logProgress)
560  Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
561  << "Evaluation of " << GetMethodName() << " on "
562  << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
563  << " sample (" << nEvents << " events)" << Endl;
564 
565  float* data = new float[nEvents*fNVars];
566  for (UInt_t i=0; i<nEvents; i++) {
567  Data()->SetCurrentEvent(i);
568  const TMVA::Event *e = GetEvent();
569  for (UInt_t j=0; j<fNVars; j++) {
570  data[j + i*fNVars] = e->GetValue(j);
571  }
572  }
573 
574  npy_intp dimsData[2] = {(npy_intp)nEvents, (npy_intp)fNVars};
575  PyArrayObject* pDataMvaValues = (PyArrayObject*)PyArray_SimpleNewFromData(2, dimsData, NPY_FLOAT, (void*)data);
576  if (pDataMvaValues==0) Log() << "Failed to load data to Python array" << Endl;
577 
578  // Get prediction for all events
579  PyObject* pModel = PyDict_GetItemString(fLocalNS, "model");
580  if (pModel==0) Log() << kFATAL << "Failed to get model Python object" << Endl;
581  PyArrayObject* pPredictions = (PyArrayObject*) PyObject_CallMethod(pModel, (char*)"predict", (char*)"O", pDataMvaValues);
582  if (pPredictions==0) Log() << kFATAL << "Failed to get predictions" << Endl;
583  delete[] data;
584 
585  // Load predictions to double vector
586  // NOTE: The signal probability is given at the output
587  std::vector<double> mvaValues(nEvents);
588  float* predictionsData = (float*) PyArray_DATA(pPredictions);
589  for (UInt_t i=0; i<nEvents; i++) {
590  mvaValues[i] = (double) predictionsData[i*fNOutputs + TMVA::Types::kSignal];
591  }
592 
593  if (logProgress) {
594  Log() << kINFO
595  << "Elapsed time for evaluation of " << nEvents << " events: "
596  << timer.GetElapsedTime() << " " << Endl;
597  }
598 
599 
600  return mvaValues;
601 }
602 
603 std::vector<Float_t>& MethodPyKeras::GetRegressionValues() {
604  // Check whether the model is setup
605  // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
606  if (!fModelIsSetup){
607  // Setup the model and load weights
608  SetupKerasModel(true);
609  }
610 
611  // Get regression values
612  const TMVA::Event* e = GetEvent();
613  for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
614  PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
615  "Failed to get predictions");
616 
617  // Use inverse transformation of targets to get final regression values
618  Event * eTrans = new Event(*e);
619  for (UInt_t i=0; i<fNOutputs; ++i) {
620  eTrans->SetTarget(i,fOutput[i]);
621  }
622 
623  const Event* eTrans2 = GetTransformationHandler().InverseTransform(eTrans);
624  for (UInt_t i=0; i<fNOutputs; ++i) {
625  fOutput[i] = eTrans2->GetTarget(i);
626  }
627 
628  return fOutput;
629 }
630 
631 std::vector<Float_t>& MethodPyKeras::GetMulticlassValues() {
632  // Check whether the model is setup
633  // NOTE: unfortunately this is needed because during evaluation ProcessOptions is not called again
634  if (!fModelIsSetup){
635  // Setup the model and load weights
636  SetupKerasModel(true);
637  }
638 
639  // Get class probabilites
640  const TMVA::Event* e = GetEvent();
641  for (UInt_t i=0; i<fNVars; i++) fVals[i] = e->GetValue(i);
642  PyRunString("for i,p in enumerate(model.predict(vals)): output[i]=p\n",
643  "Failed to get predictions");
644 
645  return fOutput;
646 }
647 
648 void MethodPyKeras::ReadModelFromFile() {
649 }
650 
651 void MethodPyKeras::GetHelpMessage() const {
652 // typical length of text line:
653 // "|--------------------------------------------------------------|"
654  Log() << Endl;
655  Log() << "Keras is a high-level API for the Theano and Tensorflow packages." << Endl;
656  Log() << "This method wraps the training and predictions steps of the Keras" << Endl;
657  Log() << "Python package for TMVA, so that dataloading, preprocessing and" << Endl;
658  Log() << "evaluation can be done within the TMVA system. To use this Keras" << Endl;
659  Log() << "interface, you have to generate a model with Keras first. Then," << Endl;
660  Log() << "this model can be loaded and trained in TMVA." << Endl;
661  Log() << Endl;
662 }
663 
664 MethodPyKeras::EBackendType MethodPyKeras::GetKerasBackend() {
665  // get the keras backend
666  // check first if using tensorflow backend
667  PyRunString("keras_backend_is_set = keras.backend.backend() == \"tensorflow\"");
668  PyObject * keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
669  if (keras_backend != nullptr && keras_backend == Py_True)
670  return kTensorFlow;
671 
672  PyRunString("keras_backend_is_set = keras.backend.backend() == \"theano\"");
673  keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
674  if (keras_backend != nullptr && keras_backend == Py_True)
675  return kTheano;
676 
677  PyRunString("keras_backend_is_set = keras.backend.backend() == \"cntk\"");
678  keras_backend = PyDict_GetItemString(fLocalNS,"keras_backend_is_set");
679  if (keras_backend != nullptr && keras_backend == Py_True)
680  return kCNTK;
681 
682  return kUndefined;
683 }
684 
685 TString MethodPyKeras::GetKerasBackendName() {
686  // get the keras backend name
687  EBackendType type = GetKerasBackend();
688  if (type == kTensorFlow) return "TensorFlow";
689  if (type == kTheano) return "Theano";
690  if (type == kCNTK) return "CNTK";
691  return "Undefined";
692 }