Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodPyGTB.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/pymva $Id$
2 // Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPyGTB *
8  * Web : http://oproject.org *
9  * *
10  * Description: *
11  * GradientBoostingClassifier Classifiear from Scikit learn *
12  * *
13  * *
14  * Redistribution and use in source and binary forms, with or without *
15  * modification, are permitted according to the terms listed in LICENSE *
16  * (http://tmva.sourceforge.net/LICENSE) *
17  * *
18  **********************************************************************************/
19 
20 #include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21 #include "TMVA/MethodPyGTB.h"
22 
23 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
24 #include <numpy/arrayobject.h>
25 
26 #include "TMVA/Configurable.h"
27 #include "TMVA/ClassifierFactory.h"
28 #include "TMVA/Config.h"
29 #include "TMVA/DataSet.h"
30 #include "TMVA/Event.h"
31 #include "TMVA/IMethod.h"
32 #include "TMVA/MsgLogger.h"
33 #include "TMVA/PDF.h"
34 #include "TMVA/Ranking.h"
35 #include "TMVA/Results.h"
37 #include "TMVA/Tools.h"
38 #include "TMVA/Types.h"
39 #include "TMVA/Timer.h"
41 
42 #include "Riostream.h"
43 #include "TMath.h"
44 #include "TMatrix.h"
45 #include "TMatrixD.h"
46 #include "TVectorD.h"
47 
48 #include <iomanip>
49 #include <fstream>
50 
51 using namespace TMVA;
52 
53 namespace TMVA {
54 namespace Internal {
55 class PyGILRAII {
56  PyGILState_STATE m_GILState;
57 
58 public:
59  PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
60  ~PyGILRAII() { PyGILState_Release(m_GILState); }
61 };
62 } // namespace Internal
63 } // namespace TMVA
64 
65 REGISTER_METHOD(PyGTB)
66 
67 ClassImp(MethodPyGTB);
68 
69 //_______________________________________________________________________
70 MethodPyGTB::MethodPyGTB(const TString &jobName,
71  const TString &methodTitle,
72  DataSetInfo &dsi,
73  const TString &theOption) :
74  PyMethodBase(jobName, Types::kPyGTB, methodTitle, dsi, theOption),
75  fLoss("deviance"),
76  fLearningRate(0.1),
77  fNestimators(100),
78  fSubsample(1.0),
79  fMinSamplesSplit(2),
80  fMinSamplesLeaf(1),
81  fMinWeightFractionLeaf(0.0),
82  fMaxDepth(3),
83  fInit("None"),
84  fRandomState("None"),
85  fMaxFeatures("None"),
86  fVerbose(0),
87  fMaxLeafNodes("None"),
88  fWarmStart(kFALSE)
89 {
90 }
91 
92 //_______________________________________________________________________
93 MethodPyGTB::MethodPyGTB(DataSetInfo &theData, const TString &theWeightFile)
94  : PyMethodBase(Types::kPyGTB, theData, theWeightFile),
95  fLoss("deviance"),
96  fLearningRate(0.1),
97  fNestimators(100),
98  fSubsample(1.0),
99  fMinSamplesSplit(2),
100  fMinSamplesLeaf(1),
101  fMinWeightFractionLeaf(0.0),
102  fMaxDepth(3),
103  fInit("None"),
104  fRandomState("None"),
105  fMaxFeatures("None"),
106  fVerbose(0),
107  fMaxLeafNodes("None"),
108  fWarmStart(kFALSE)
109 {
110 }
111 
112 
113 //_______________________________________________________________________
114 MethodPyGTB::~MethodPyGTB(void)
115 {
116 }
117 
118 //_______________________________________________________________________
119 Bool_t MethodPyGTB::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
120 {
121  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
122  if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
123  return kFALSE;
124 }
125 
126 
127 //_______________________________________________________________________
128 void MethodPyGTB::DeclareOptions()
129 {
130  MethodBase::DeclareCompatibilityOptions();
131 
132  DeclareOptionRef(fLoss, "Loss", "{'deviance', 'exponential'}, optional (default='deviance')\
133  loss function to be optimized. 'deviance' refers to\
134  deviance (= logistic regression) for classification\
135  with probabilistic outputs. For loss 'exponential' gradient\
136  boosting recovers the AdaBoost algorithm.");
137 
138  DeclareOptionRef(fLearningRate, "LearningRate", "float, optional (default=0.1)\
139  learning rate shrinks the contribution of each tree by `learning_rate`.\
140  There is a trade-off between learning_rate and n_estimators.");
141 
142  DeclareOptionRef(fNestimators, "NEstimators", "int (default=100)\
143  The number of boosting stages to perform. Gradient boosting\
144  is fairly robust to over-fitting so a large number usually\
145  results in better performance.");
146 
147  DeclareOptionRef(fSubsample, "Subsample", "float, optional (default=1.0)\
148  The fraction of samples to be used for fitting the individual base\
149  learners. If smaller than 1.0 this results in Stochastic Gradient\
150  Boosting. `subsample` interacts with the parameter `n_estimators`.\
151  Choosing `subsample < 1.0` leads to a reduction of variance\
152  and an increase in bias.");
153 
154  DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
155  The minimum number of samples required to split an internal node.");
156 
157  DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
158  The minimum number of samples in newly created leaves. A split is \
159  discarded if after the split, one of the leaves would contain less then \
160  ``min_samples_leaf`` samples.");
161 
162  DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
163  The minimum weighted fraction of the input samples required to be at a \
164  leaf node.");
165 
166  DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
167  The maximum depth of the tree. If None, then nodes are expanded until \
168  all leaves are pure or until all leaves contain less than \
169  min_samples_split samples. \
170  Ignored if ``max_leaf_nodes`` is not None.");
171 
172  DeclareOptionRef(fInit, "Init", "BaseEstimator, None, optional (default=None)\
173  An estimator object that is used to compute the initial\
174  predictions. ``init`` has to provide ``fit`` and ``predict``.\
175  If None it uses ``loss.init_estimator`");
176 
177  DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
178  If int, random_state is the seed used by the random number generator;\
179  If RandomState instance, random_state is the random number generator;\
180  If None, the random number generator is the RandomState instance used\
181  by `np.random`.");
182 
183  DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
184 
185  DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
186  Controls the verbosity of the tree building process.");
187 
188  DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
189  Grow trees with ``max_leaf_nodes`` in best-first fashion.\
190  Best nodes are defined as relative reduction in impurity.\
191  If None then unlimited number of leaf nodes.\
192  If not None then ``max_depth`` will be ignored.");
193 
194  DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
195  When set to ``True``, reuse the solution of the previous call to fit\
196  and add more estimators to the ensemble, otherwise, just fit a whole\
197  new forest.");
198 
199  DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
200  "Store trained classifier in this file");
201 }
202 
203 //_______________________________________________________________________
204 // Check options and load them to local python namespace
205 void MethodPyGTB::ProcessOptions()
206 {
207  if (fLoss != "deviance" && fLoss != "exponential") {
208  Log() << kFATAL << Form("Loss = %s ... that does not work!", fLoss.Data())
209  << " The options are 'deviance' or 'exponential'." << Endl;
210  }
211  pLoss = Eval(Form("'%s'", fLoss.Data()));
212  PyDict_SetItemString(fLocalNS, "loss", pLoss);
213 
214  if (fLearningRate <= 0) {
215  Log() << kFATAL << "LearningRate <= 0 ... that does not work!" << Endl;
216  }
217  pLearningRate = Eval(Form("%f", fLearningRate));
218  PyDict_SetItemString(fLocalNS, "learningRate", pLearningRate);
219 
220  if (fNestimators <= 0) {
221  Log() << kFATAL << "NEstimators <= 0 ... that does not work!" << Endl;
222  }
223  pNestimators = Eval(Form("%i", fNestimators));
224  PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
225 
226  if (fMinSamplesSplit < 0) {
227  Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
228  }
229  pMinSamplesSplit = Eval(Form("%i", fMinSamplesSplit));
230  PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
231 
232  if (fSubsample < 0) {
233  Log() << kFATAL << "Subsample < 0 ... that does not work!" << Endl;
234  }
235  pSubsample = Eval(Form("%f", fSubsample));
236  PyDict_SetItemString(fLocalNS, "subsample", pSubsample);
237 
238  if (fMinSamplesLeaf < 0) {
239  Log() << kFATAL << "MinSamplesLeaf < 0 ... that does not work!" << Endl;
240  }
241  pMinSamplesLeaf = Eval(Form("%i", fMinSamplesLeaf));
242  PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
243 
244  if (fMinSamplesSplit < 0) {
245  Log() << kFATAL << "MinSamplesSplit < 0 ... that does not work!" << Endl;
246  }
247  pMinSamplesSplit = Eval(Form("%i", fMinSamplesSplit));
248  PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
249 
250  if (fMinWeightFractionLeaf < 0) {
251  Log() << kFATAL << "MinWeightFractionLeaf < 0 ... that does not work !" << Endl;
252  }
253  pMinWeightFractionLeaf = Eval(Form("%f", fMinWeightFractionLeaf));
254  PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
255 
256  if (fMaxDepth <= 0) {
257  Log() << kFATAL << " MaxDepth <= 0 ... that does not work !! " << Endl;
258  }
259  pMaxDepth = Eval(Form("%i", fMaxDepth));
260  PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
261 
262  pInit = Eval(fInit);
263  if (!pInit) {
264  Log() << kFATAL << Form("Init = %s ... that does not work!", fInit.Data())
265  << " The options are None or BaseEstimator, which is an estimator object that"
266  << "is used to compute the initial predictions. "
267  << "'init' has to provide 'fit' and 'predict' methods."
268  << " If None it uses 'loss.init_estimator'." << Endl;
269  }
270  PyDict_SetItemString(fLocalNS, "init", pInit);
271 
272  pRandomState = Eval(fRandomState);
273  if (!pRandomState) {
274  Log() << kFATAL << Form(" RandomState = %s ... that does not work! ", fRandomState.Data())
275  << " If int, random_state is the seed used by the random number generator;"
276  << " If RandomState instance, random_state is the random number generator;"
277  << " If None, the random number generator is the RandomState instance used by 'np.random'."
278  << Endl;
279  }
280  PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
281 
282  if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
283  fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
284  }
285  pMaxFeatures = Eval(fMaxFeatures);
286  PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
287 
288  if (!pMaxFeatures) {
289  Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
290  << "int, float, string or None, optional (default='auto')"
291  << "The number of features to consider when looking for the best split:"
292  << "If int, then consider `max_features` features at each split."
293  << "If float, then `max_features` is a percentage and"
294  << "`int(max_features * n_features)` features are considered at each split."
295  << "If 'auto', then `max_features=sqrt(n_features)`."
296  << "If 'sqrt', then `max_features=sqrt(n_features)`."
297  << "If 'log2', then `max_features=log2(n_features)`."
298  << "If None, then `max_features=n_features`." << Endl;
299  }
300 
301  pMaxLeafNodes = Eval(fMaxLeafNodes);
302  if (!pMaxLeafNodes) {
303  Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work!", fMaxLeafNodes.Data())
304  << " The options are None or integer." << Endl;
305  }
306  PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
307 
308  pVerbose = Eval(Form("%i", fVerbose));
309  PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
310 
311  pWarmStart = Eval(Form("%i", UInt_t(fWarmStart)));
312  PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
313 
314  // If no filename is given, set default
315  if(fFilenameClassifier.IsNull()) {
316  fFilenameClassifier = GetWeightFileDir() + "/PyGTBModel_" + GetName() + ".PyData";
317  }
318 }
319 
320 //_______________________________________________________________________
321 void MethodPyGTB::Init()
322 {
323  TMVA::Internal::PyGILRAII raii;
324  _import_array(); //require to use numpy arrays
325 
326  // Check options and load them to local python namespace
327  ProcessOptions();
328 
329  // Import module for gradient tree boosting classifier
330  PyRunString("import sklearn.ensemble");
331 
332  // Get data properties
333  fNvars = GetNVariables();
334  fNoutputs = DataInfo().GetNClasses();
335 }
336 
337 void MethodPyGTB::Train()
338 {
339  // Load training data (data, classes, weights) to python arrays
340  int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
341  npy_intp dimsData[2];
342  dimsData[0] = fNrowsTraining;
343  dimsData[1] = fNvars;
344  PyArrayObject * fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
345  PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
346  float *TrainData = (float *)(PyArray_DATA(fTrainData));
347 
348  npy_intp dimsClasses = (npy_intp) fNrowsTraining;
349  PyArrayObject * fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
350  PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
351  float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
352 
353  PyArrayObject * fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
354  PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
355  float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
356 
357  for (int i = 0; i < fNrowsTraining; i++) {
358  // Fill training data matrix
359  const TMVA::Event *e = Data()->GetTrainingEvent(i);
360  for (UInt_t j = 0; j < fNvars; j++) {
361  TrainData[j + i * fNvars] = e->GetValue(j);
362  }
363 
364  // Fill target classes
365  TrainDataClasses[i] = e->GetClass();
366 
367  // Get event weight
368  TrainDataWeights[i] = e->GetWeight();
369  }
370 
371  // Create classifier object
372  PyRunString("classifier = sklearn.ensemble.GradientBoostingClassifier(loss=loss, learning_rate=learningRate, n_estimators=nEstimators, max_depth=maxDepth, min_samples_split=minSamplesSplit, min_samples_leaf=minSamplesLeaf, min_weight_fraction_leaf=minWeightFractionLeaf, subsample=subsample, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, init=init, verbose=verbose, warm_start=warmStart, random_state=randomState)",
373  "Failed to setup classifier");
374 
375  // Fit classifier
376  // NOTE: We dump the output to a variable so that the call does not pollute stdout
377  PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
378 
379  // Store classifier
380  fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
381  if(fClassifier == 0) {
382  Log() << kFATAL << "Can't create classifier object from GradientBoostingClassifier" << Endl;
383  Log() << Endl;
384  }
385 
386  if (IsModelPersistence()) {
387  Log() << Endl;
388  Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
389  Log() << Endl;
390  Serialize(fFilenameClassifier, fClassifier);
391  }
392 }
393 
394 //_______________________________________________________________________
395 void MethodPyGTB::TestClassification()
396 {
397  MethodBase::TestClassification();
398 }
399 
400 //_______________________________________________________________________
401 std::vector<Double_t> MethodPyGTB::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
402 {
403  // Load model if not already done
404  if (fClassifier == 0) ReadModelFromFile();
405 
406  // Determine number of events
407  Long64_t nEvents = Data()->GetNEvents();
408  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
409  if (firstEvt < 0) firstEvt = 0;
410  nEvents = lastEvt-firstEvt;
411 
412  // use timer
413  Timer timer( nEvents, GetName(), kTRUE );
414 
415  if (logProgress)
416  Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
417  << "Evaluation of " << GetMethodName() << " on "
418  << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
419  << " sample (" << nEvents << " events)" << Endl;
420 
421  // Get data
422  npy_intp dims[2];
423  dims[0] = nEvents;
424  dims[1] = fNvars;
425  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
426  float *pValue = (float *)(PyArray_DATA(pEvent));
427 
428  for (Int_t ievt=0; ievt<nEvents; ievt++) {
429  Data()->SetCurrentEvent(ievt);
430  const TMVA::Event *e = Data()->GetEvent();
431  for (UInt_t i = 0; i < fNvars; i++) {
432  pValue[ievt * fNvars + i] = e->GetValue(i);
433  }
434  }
435 
436  // Get prediction from classifier
437  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
438  double *proba = (double *)(PyArray_DATA(result));
439 
440  // Return signal probabilities
441  if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
442  for (int i = 0; i < nEvents; ++i) {
443  mvaValues[i] = proba[fNoutputs*i + TMVA::Types::kSignal];
444  }
445 
446  Py_DECREF(pEvent);
447  Py_DECREF(result);
448 
449  if (logProgress) {
450  Log() << kINFO
451  << "Elapsed time for evaluation of " << nEvents << " events: "
452  << timer.GetElapsedTime() << " " << Endl;
453  }
454 
455 
456  return mvaValues;
457 }
458 
459 //_______________________________________________________________________
460 Double_t MethodPyGTB::GetMvaValue(Double_t *errLower, Double_t *errUpper)
461 {
462  // cannot determine error
463  NoErrorCalc(errLower, errUpper);
464 
465  // Load model if not already done
466  if (fClassifier == 0) ReadModelFromFile();
467 
468  // Get current event and load to python array
469  const TMVA::Event *e = Data()->GetEvent();
470  npy_intp dims[2];
471  dims[0] = 1;
472  dims[1] = fNvars;
473  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
474  float *pValue = (float *)(PyArray_DATA(pEvent));
475  for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
476 
477  // Get prediction from classifier
478  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
479  double *proba = (double *)(PyArray_DATA(result));
480 
481  // Return MVA value
482  Double_t mvaValue;
483  mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
484 
485  Py_DECREF(result);
486  Py_DECREF(pEvent);
487 
488  return mvaValue;
489 }
490 
491 //_______________________________________________________________________
492 std::vector<Float_t>& MethodPyGTB::GetMulticlassValues()
493 {
494  // Load model if not already done
495  if (fClassifier == 0) ReadModelFromFile();
496 
497  // Get current event and load to python array
498  const TMVA::Event *e = Data()->GetEvent();
499  npy_intp dims[2];
500  dims[0] = 1;
501  dims[1] = fNvars;
502  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
503  float *pValue = (float *)(PyArray_DATA(pEvent));
504  for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
505 
506  // Get prediction from classifier
507  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
508  double *proba = (double *)(PyArray_DATA(result));
509 
510  // Return MVA values
511  if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
512  for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
513 
514  Py_DECREF(pEvent);
515  Py_DECREF(result);
516 
517  return classValues;
518 }
519 
520 //_______________________________________________________________________
521 void MethodPyGTB::ReadModelFromFile()
522 {
523  if (!PyIsInitialized()) {
524  PyInitialize();
525  }
526 
527  Log() << Endl;
528  Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
529  Log() << Endl;
530 
531  // Load classifier from file
532  Int_t err = UnSerialize(fFilenameClassifier, &fClassifier);
533  if(err != 0)
534  {
535  Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
536  }
537 
538  // Book classifier object in python dict
539  PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
540 
541  // Load data properties
542  // NOTE: This has to be repeated here for the reader application
543  fNvars = GetNVariables();
544  fNoutputs = DataInfo().GetNClasses();
545 }
546 
547 //_______________________________________________________________________
548 const Ranking* MethodPyGTB::CreateRanking()
549 {
550  // Get feature importance from classifier as an array with length equal
551  // number of variables, higher value signals a higher importance
552  PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
553  if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
554 
555  // Fill ranking object and return it
556  fRanking = new Ranking(GetName(), "Variable Importance");
557  Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
558  for(UInt_t iVar=0; iVar<fNvars; iVar++){
559  fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
560  }
561 
562  Py_DECREF(pRanking);
563 
564  return fRanking;
565 }
566 
567 //_______________________________________________________________________
568 void MethodPyGTB::GetHelpMessage() const
569 {
570  // typical length of text line:
571  // "|--------------------------------------------------------------|"
572  Log() << "A gradient tree boosting classifier builds a model from an ensemble" << Endl;
573  Log() << "of decision trees, which are adapted each boosting step to fit better" << Endl;
574  Log() << "to previously misclassified events." << Endl;
575  Log() << Endl;
576  Log() << "Check out the scikit-learn documentation for more information." << Endl;
577 }
578 
579