Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodPyRandomForest.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/pymva $Id$
2 // Authors: Omar Zapata, Lorenzo Moneta, Sergei Gleyzer 2015
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPyRandomForest *
8  * Web : http://oproject.org *
9  * *
10  * Description: *
11  * Random Forest Classifiear from Scikit learn *
12  * *
13  * *
14  * Redistribution and use in source and binary forms, with or without *
15  * modification, are permitted according to the terms listed in LICENSE *
16  * (http://tmva.sourceforge.net/LICENSE) *
17  * *
18  **********************************************************************************/
19 #include <Python.h> // Needs to be included first to avoid redefinition of _POSIX_C_SOURCE
21 
22 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
23 #include <numpy/arrayobject.h>
24 
25 #include "TMVA/Configurable.h"
26 #include "TMVA/ClassifierFactory.h"
27 #include "TMVA/Config.h"
28 #include "TMVA/DataSet.h"
29 #include "TMVA/Event.h"
30 #include "TMVA/IMethod.h"
31 #include "TMVA/MsgLogger.h"
32 #include "TMVA/PDF.h"
33 #include "TMVA/Ranking.h"
34 #include "TMVA/Results.h"
35 #include "TMVA/Tools.h"
36 #include "TMVA/Types.h"
37 #include "TMVA/Timer.h"
39 
40 #include "Riostream.h"
41 #include "TMath.h"
42 #include "TMatrix.h"
43 #include "TMatrixD.h"
44 #include "TVectorD.h"
45 
46 #include <iomanip>
47 #include <fstream>
48 
49 using namespace TMVA;
50 
51 namespace TMVA {
52 namespace Internal {
53 class PyGILRAII {
54  PyGILState_STATE m_GILState;
55 
56 public:
57  PyGILRAII() : m_GILState(PyGILState_Ensure()) {}
58  ~PyGILRAII() { PyGILState_Release(m_GILState); }
59 };
60 } // namespace Internal
61 } // namespace TMVA
62 
63 REGISTER_METHOD(PyRandomForest)
64 
65 ClassImp(MethodPyRandomForest);
66 
67 //_______________________________________________________________________
68 MethodPyRandomForest::MethodPyRandomForest(const TString &jobName,
69  const TString &methodTitle,
70  DataSetInfo &dsi,
71  const TString &theOption) :
72  PyMethodBase(jobName, Types::kPyRandomForest, methodTitle, dsi, theOption),
73  fNestimators(10),
74  fCriterion("gini"),
75  fMaxDepth("None"),
76  fMinSamplesSplit(2),
77  fMinSamplesLeaf(1),
78  fMinWeightFractionLeaf(0),
79  fMaxFeatures("'auto'"),
80  fMaxLeafNodes("None"),
81  fBootstrap(kTRUE),
82  fOobScore(kFALSE),
83  fNjobs(1),
84  fRandomState("None"),
85  fVerbose(0),
86  fWarmStart(kFALSE),
87  fClassWeight("None")
88 {
89 }
90 
91 //_______________________________________________________________________
92 MethodPyRandomForest::MethodPyRandomForest(DataSetInfo &theData, const TString &theWeightFile)
93  : PyMethodBase(Types::kPyRandomForest, theData, theWeightFile),
94  fNestimators(10),
95  fCriterion("gini"),
96  fMaxDepth("None"),
97  fMinSamplesSplit(2),
98  fMinSamplesLeaf(1),
99  fMinWeightFractionLeaf(0),
100  fMaxFeatures("'auto'"),
101  fMaxLeafNodes("None"),
102  fBootstrap(kTRUE),
103  fOobScore(kFALSE),
104  fNjobs(1),
105  fRandomState("None"),
106  fVerbose(0),
107  fWarmStart(kFALSE),
108  fClassWeight("None")
109 {
110 }
111 
112 
113 //_______________________________________________________________________
114 MethodPyRandomForest::~MethodPyRandomForest(void)
115 {
116 }
117 
118 //_______________________________________________________________________
119 Bool_t MethodPyRandomForest::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t)
120 {
121  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
122  if (type == Types::kMulticlass && numberClasses >= 2) return kTRUE;
123  return kFALSE;
124 }
125 
126 //_______________________________________________________________________
127 void MethodPyRandomForest::DeclareOptions()
128 {
129  MethodBase::DeclareCompatibilityOptions();
130 
131  DeclareOptionRef(fNestimators, "NEstimators", "Integer, optional (default=10). The number of trees in the forest.");
132  DeclareOptionRef(fCriterion, "Criterion", "String, optional (default='gini') \
133  The function to measure the quality of a split. Supported criteria are \
134  'gini' for the Gini impurity and 'entropy' for the information gain. \
135  Note: this parameter is tree-specific.");
136 
137  DeclareOptionRef(fMaxDepth, "MaxDepth", "integer or None, optional (default=None) \
138  The maximum depth of the tree. If None, then nodes are expanded until \
139  all leaves are pure or until all leaves contain less than \
140  min_samples_split samples. \
141  Ignored if ``max_leaf_nodes`` is not None.");
142 
143  DeclareOptionRef(fMinSamplesSplit, "MinSamplesSplit", "integer, optional (default=2)\
144  The minimum number of samples required to split an internal node.");
145 
146  DeclareOptionRef(fMinSamplesLeaf, "MinSamplesLeaf", "integer, optional (default=1) \
147  The minimum number of samples in newly created leaves. A split is \
148  discarded if after the split, one of the leaves would contain less then \
149  ``min_samples_leaf`` samples.");
150  DeclareOptionRef(fMinWeightFractionLeaf, "MinWeightFractionLeaf", "//float, optional (default=0.) \
151  The minimum weighted fraction of the input samples required to be at a \
152  leaf node.");
153  DeclareOptionRef(fMaxFeatures, "MaxFeatures", "The number of features to consider when looking for the best split");
154 
155  DeclareOptionRef(fMaxLeafNodes, "MaxLeafNodes", "int or None, optional (default=None)\
156  Grow trees with ``max_leaf_nodes`` in best-first fashion.\
157  Best nodes are defined as relative reduction in impurity.\
158  If None then unlimited number of leaf nodes.\
159  If not None then ``max_depth`` will be ignored.");
160 
161  DeclareOptionRef(fBootstrap, "Bootstrap", "boolean, optional (default=True) \
162  Whether bootstrap samples are used when building trees.");
163 
164  DeclareOptionRef(fOobScore, "OoBScore", " bool Whether to use out-of-bag samples to estimate\
165  the generalization error.");
166 
167  DeclareOptionRef(fNjobs, "NJobs", " integer, optional (default=1) \
168  The number of jobs to run in parallel for both `fit` and `predict`. \
169  If -1, then the number of jobs is set to the number of cores.");
170 
171  DeclareOptionRef(fRandomState, "RandomState", "int, RandomState instance or None, optional (default=None)\
172  If int, random_state is the seed used by the random number generator;\
173  If RandomState instance, random_state is the random number generator;\
174  If None, the random number generator is the RandomState instance used\
175  by `np.random`.");
176 
177  DeclareOptionRef(fVerbose, "Verbose", "int, optional (default=0)\
178  Controls the verbosity of the tree building process.");
179 
180  DeclareOptionRef(fWarmStart, "WarmStart", "bool, optional (default=False)\
181  When set to ``True``, reuse the solution of the previous call to fit\
182  and add more estimators to the ensemble, otherwise, just fit a whole\
183  new forest.");
184 
185  DeclareOptionRef(fClassWeight, "ClassWeight", "dict, list of dicts, \"auto\", \"subsample\" or None, optional\
186  Weights associated with classes in the form ``{class_label: weight}``.\
187  If not given, all classes are supposed to have weight one. For\
188  multi-output problems, a list of dicts can be provided in the same\
189  order as the columns of y.\
190  The \"auto\" mode uses the values of y to automatically adjust\
191  weights inversely proportional to class frequencies in the input data.\
192  The \"subsample\" mode is the same as \"auto\" except that weights are\
193  computed based on the bootstrap sample for every tree grown.\
194  For multi-output, the weights of each column of y will be multiplied.\
195  Note that these weights will be multiplied with sample_weight (passed\
196  through the fit method) if sample_weight is specified.");
197 
198  DeclareOptionRef(fFilenameClassifier, "FilenameClassifier",
199  "Store trained classifier in this file");
200 }
201 
202 //_______________________________________________________________________
203 // Check options and load them to local python namespace
204 void MethodPyRandomForest::ProcessOptions()
205 {
206  if (fNestimators <= 0) {
207  Log() << kFATAL << " NEstimators <=0... that does not work !! " << Endl;
208  }
209  pNestimators = Eval(Form("%i", fNestimators));
210  PyDict_SetItemString(fLocalNS, "nEstimators", pNestimators);
211 
212  if (fCriterion != "gini" && fCriterion != "entropy") {
213  Log() << kFATAL << Form(" Criterion = %s... that does not work !! ", fCriterion.Data())
214  << " The options are `gini` or `entropy`." << Endl;
215  }
216  pCriterion = Eval(Form("'%s'", fCriterion.Data()));
217  PyDict_SetItemString(fLocalNS, "criterion", pCriterion);
218 
219  pMaxDepth = Eval(fMaxDepth);
220  PyDict_SetItemString(fLocalNS, "maxDepth", pMaxDepth);
221  if (!pMaxDepth) {
222  Log() << kFATAL << Form(" MaxDepth = %s... that does not work !! ", fMaxDepth.Data())
223  << " The options are None or integer." << Endl;
224  }
225 
226  if (fMinSamplesSplit < 0) {
227  Log() << kFATAL << " MinSamplesSplit < 0... that does not work !! " << Endl;
228  }
229  pMinSamplesSplit = Eval(Form("%i", fMinSamplesSplit));
230  PyDict_SetItemString(fLocalNS, "minSamplesSplit", pMinSamplesSplit);
231 
232  if (fMinSamplesLeaf < 0) {
233  Log() << kFATAL << " MinSamplesLeaf < 0... that does not work !! " << Endl;
234  }
235  pMinSamplesLeaf = Eval(Form("%i", fMinSamplesLeaf));
236  PyDict_SetItemString(fLocalNS, "minSamplesLeaf", pMinSamplesLeaf);
237 
238  if (fMinWeightFractionLeaf < 0) {
239  Log() << kERROR << " MinWeightFractionLeaf < 0... that does not work !! " << Endl;
240  }
241  pMinWeightFractionLeaf = Eval(Form("%f", fMinWeightFractionLeaf));
242  PyDict_SetItemString(fLocalNS, "minWeightFractionLeaf", pMinWeightFractionLeaf);
243 
244  if (fMaxFeatures == "auto" || fMaxFeatures == "sqrt" || fMaxFeatures == "log2"){
245  fMaxFeatures = Form("'%s'", fMaxFeatures.Data());
246  }
247  pMaxFeatures = Eval(fMaxFeatures);
248  PyDict_SetItemString(fLocalNS, "maxFeatures", pMaxFeatures);
249 
250  if (!pMaxFeatures) {
251  Log() << kFATAL << Form(" MaxFeatures = %s... that does not work !! ", fMaxFeatures.Data())
252  << "int, float, string or None, optional (default='auto')"
253  << "The number of features to consider when looking for the best split:"
254  << "If int, then consider `max_features` features at each split."
255  << "If float, then `max_features` is a percentage and"
256  << "`int(max_features * n_features)` features are considered at each split."
257  << "If 'auto', then `max_features=sqrt(n_features)`."
258  << "If 'sqrt', then `max_features=sqrt(n_features)`."
259  << "If 'log2', then `max_features=log2(n_features)`."
260  << "If None, then `max_features=n_features`." << Endl;
261  }
262 
263  pMaxLeafNodes = Eval(fMaxLeafNodes);
264  if (!pMaxLeafNodes) {
265  Log() << kFATAL << Form(" MaxLeafNodes = %s... that does not work !! ", fMaxLeafNodes.Data())
266  << " The options are None or integer." << Endl;
267  }
268  PyDict_SetItemString(fLocalNS, "maxLeafNodes", pMaxLeafNodes);
269 
270  pRandomState = Eval(fRandomState);
271  if (!pRandomState) {
272  Log() << kFATAL << Form(" RandomState = %s... that does not work !! ", fRandomState.Data())
273  << "If int, random_state is the seed used by the random number generator;"
274  << "If RandomState instance, random_state is the random number generator;"
275  << "If None, the random number generator is the RandomState instance used by `np.random`." << Endl;
276  }
277  PyDict_SetItemString(fLocalNS, "randomState", pRandomState);
278 
279  pClassWeight = Eval(fClassWeight);
280  if (!pClassWeight) {
281  Log() << kFATAL << Form(" ClassWeight = %s... that does not work !! ", fClassWeight.Data())
282  << "dict, list of dicts, 'auto', 'subsample' or None, optional" << Endl;
283  }
284  PyDict_SetItemString(fLocalNS, "classWeight", pClassWeight);
285 
286  if(fNjobs < 1) {
287  Log() << kFATAL << Form(" NJobs = %i... that does not work !! ", fNjobs)
288  << "Value has to be greater than zero." << Endl;
289  }
290  pNjobs = Eval(Form("%i", fNjobs));
291  PyDict_SetItemString(fLocalNS, "nJobs", pNjobs);
292 
293  pBootstrap = Eval(Form("%i", UInt_t(fBootstrap)));
294  PyDict_SetItemString(fLocalNS, "bootstrap", pBootstrap);
295  pOobScore = Eval(Form("%i", UInt_t(fOobScore)));
296  PyDict_SetItemString(fLocalNS, "oobScore", pOobScore);
297  pVerbose = Eval(Form("%i", fVerbose));
298  PyDict_SetItemString(fLocalNS, "verbose", pVerbose);
299  pWarmStart = Eval(Form("%i", UInt_t(fWarmStart)));
300  PyDict_SetItemString(fLocalNS, "warmStart", pWarmStart);
301 
302  // If no filename is given, set default
303  if(fFilenameClassifier.IsNull())
304  {
305  fFilenameClassifier = GetWeightFileDir() + "/PyRFModel_" + GetName() + ".PyData";
306  }
307 }
308 
309 //_______________________________________________________________________
310 void MethodPyRandomForest::Init()
311 {
312  TMVA::Internal::PyGILRAII raii;
313  _import_array(); //require to use numpy arrays
314 
315  // Check options and load them to local python namespace
316  ProcessOptions();
317 
318  // Import module for random forest classifier
319  PyRunString("import sklearn.ensemble");
320 
321  // Get data properties
322  fNvars = GetNVariables();
323  fNoutputs = DataInfo().GetNClasses();
324 }
325 
326 //_______________________________________________________________________
327 void MethodPyRandomForest::Train()
328 {
329  // Load training data (data, classes, weights) to python arrays
330  int fNrowsTraining = Data()->GetNTrainingEvents(); //every row is an event, a class type and a weight
331  npy_intp dimsData[2];
332  dimsData[0] = fNrowsTraining;
333  dimsData[1] = fNvars;
334  PyArrayObject * fTrainData = (PyArrayObject *)PyArray_SimpleNew(2, dimsData, NPY_FLOAT);
335  PyDict_SetItemString(fLocalNS, "trainData", (PyObject*)fTrainData);
336  float *TrainData = (float *)(PyArray_DATA(fTrainData));
337 
338  npy_intp dimsClasses = (npy_intp) fNrowsTraining;
339  PyArrayObject * fTrainDataClasses = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
340  PyDict_SetItemString(fLocalNS, "trainDataClasses", (PyObject*)fTrainDataClasses);
341  float *TrainDataClasses = (float *)(PyArray_DATA(fTrainDataClasses));
342 
343  PyArrayObject * fTrainDataWeights = (PyArrayObject *)PyArray_SimpleNew(1, &dimsClasses, NPY_FLOAT);
344  PyDict_SetItemString(fLocalNS, "trainDataWeights", (PyObject*)fTrainDataWeights);
345  float *TrainDataWeights = (float *)(PyArray_DATA(fTrainDataWeights));
346 
347  for (int i = 0; i < fNrowsTraining; i++) {
348  // Fill training data matrix
349  const TMVA::Event *e = Data()->GetTrainingEvent(i);
350  for (UInt_t j = 0; j < fNvars; j++) {
351  TrainData[j + i * fNvars] = e->GetValue(j);
352  }
353 
354  // Fill target classes
355  TrainDataClasses[i] = e->GetClass();
356 
357  // Get event weight
358  TrainDataWeights[i] = e->GetWeight();
359  }
360 
361  // Create classifier object
362  PyRunString("classifier = sklearn.ensemble.RandomForestClassifier(bootstrap=bootstrap, class_weight=classWeight, criterion=criterion, max_depth=maxDepth, max_features=maxFeatures, max_leaf_nodes=maxLeafNodes, min_samples_leaf=minSamplesLeaf, min_samples_split=minSamplesSplit, min_weight_fraction_leaf=minWeightFractionLeaf, n_estimators=nEstimators, n_jobs=nJobs, oob_score=oobScore, random_state=randomState, verbose=verbose, warm_start=warmStart)",
363  "Failed to setup classifier");
364 
365  // Fit classifier
366  // NOTE: We dump the output to a variable so that the call does not pollute stdout
367  PyRunString("dump = classifier.fit(trainData, trainDataClasses, trainDataWeights)", "Failed to train classifier");
368 
369  // Store classifier
370  fClassifier = PyDict_GetItemString(fLocalNS, "classifier");
371  if(fClassifier == 0) {
372  Log() << kFATAL << "Can't create classifier object from RandomForestClassifier" << Endl;
373  Log() << Endl;
374  }
375 
376  if (IsModelPersistence()) {
377  Log() << Endl;
378  Log() << gTools().Color("bold") << "Saving state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
379  Log() << Endl;
380  Serialize(fFilenameClassifier, fClassifier);
381  }
382 }
383 
384 //_______________________________________________________________________
385 void MethodPyRandomForest::TestClassification()
386 {
387  MethodBase::TestClassification();
388 }
389 
390 //_______________________________________________________________________
391 std::vector<Double_t> MethodPyRandomForest::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
392 {
393  // Load model if not already done
394  if (fClassifier == 0) ReadModelFromFile();
395 
396  // Determine number of events
397  Long64_t nEvents = Data()->GetNEvents();
398  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
399  if (firstEvt < 0) firstEvt = 0;
400  nEvents = lastEvt-firstEvt;
401 
402  // use timer
403  Timer timer( nEvents, GetName(), kTRUE );
404 
405  if (logProgress)
406  Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
407  << "Evaluation of " << GetMethodName() << " on "
408  << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
409  << " sample (" << nEvents << " events)" << Endl;
410 
411  // Get data
412  npy_intp dims[2];
413  dims[0] = nEvents;
414  dims[1] = fNvars;
415  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
416  float *pValue = (float *)(PyArray_DATA(pEvent));
417 
418  for (Int_t ievt=0; ievt<nEvents; ievt++) {
419  Data()->SetCurrentEvent(ievt);
420  const TMVA::Event *e = Data()->GetEvent();
421  for (UInt_t i = 0; i < fNvars; i++) {
422  pValue[ievt * fNvars + i] = e->GetValue(i);
423  }
424  }
425 
426  // Get prediction from classifier
427  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
428  double *proba = (double *)(PyArray_DATA(result));
429 
430  // Return signal probabilities
431  if(Long64_t(mvaValues.size()) != nEvents) mvaValues.resize(nEvents);
432  for (int i = 0; i < nEvents; ++i) {
433  mvaValues[i] = proba[fNoutputs*i + TMVA::Types::kSignal];
434  }
435 
436  Py_DECREF(pEvent);
437  Py_DECREF(result);
438 
439  if (logProgress) {
440  Log() << kINFO
441  << "Elapsed time for evaluation of " << nEvents << " events: "
442  << timer.GetElapsedTime() << " " << Endl;
443  }
444 
445  return mvaValues;
446 }
447 
448 //_______________________________________________________________________
449 Double_t MethodPyRandomForest::GetMvaValue(Double_t *errLower, Double_t *errUpper)
450 {
451  // cannot determine error
452  NoErrorCalc(errLower, errUpper);
453 
454  // Load model if not already done
455  if (fClassifier == 0) ReadModelFromFile();
456 
457  // Get current event and load to python array
458  const TMVA::Event *e = Data()->GetEvent();
459  npy_intp dims[2];
460  dims[0] = 1;
461  dims[1] = fNvars;
462  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
463  float *pValue = (float *)(PyArray_DATA(pEvent));
464  for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
465 
466  // Get prediction from classifier
467  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
468  double *proba = (double *)(PyArray_DATA(result));
469 
470  // Return MVA value
471  Double_t mvaValue;
472  mvaValue = proba[TMVA::Types::kSignal]; // getting signal probability
473 
474  Py_DECREF(result);
475  Py_DECREF(pEvent);
476 
477  return mvaValue;
478 }
479 
480 //_______________________________________________________________________
481 std::vector<Float_t>& MethodPyRandomForest::GetMulticlassValues()
482 {
483  // Load model if not already done
484  if (fClassifier == 0) ReadModelFromFile();
485 
486  // Get current event and load to python array
487  const TMVA::Event *e = Data()->GetEvent();
488  npy_intp dims[2];
489  dims[0] = 1;
490  dims[1] = fNvars;
491  PyArrayObject *pEvent= (PyArrayObject *)PyArray_SimpleNew(2, dims, NPY_FLOAT);
492  float *pValue = (float *)(PyArray_DATA(pEvent));
493  for (UInt_t i = 0; i < fNvars; i++) pValue[i] = e->GetValue(i);
494 
495  // Get prediction from classifier
496  PyArrayObject *result = (PyArrayObject *)PyObject_CallMethod(fClassifier, const_cast<char *>("predict_proba"), const_cast<char *>("(O)"), pEvent);
497  double *proba = (double *)(PyArray_DATA(result));
498 
499  // Return MVA values
500  if(UInt_t(classValues.size()) != fNoutputs) classValues.resize(fNoutputs);
501  for(UInt_t i = 0; i < fNoutputs; i++) classValues[i] = proba[i];
502 
503  Py_DECREF(pEvent);
504  Py_DECREF(result);
505 
506  return classValues;
507 }
508 
509 //_______________________________________________________________________
510 void MethodPyRandomForest::ReadModelFromFile()
511 {
512  if (!PyIsInitialized()) {
513  PyInitialize();
514  }
515 
516  Log() << Endl;
517  Log() << gTools().Color("bold") << "Loading state file: " << gTools().Color("reset") << fFilenameClassifier << Endl;
518  Log() << Endl;
519 
520  // Load classifier from file
521  Int_t err = UnSerialize(fFilenameClassifier, &fClassifier);
522  if(err != 0)
523  {
524  Log() << kFATAL << Form("Failed to load classifier from file (error code: %i): %s", err, fFilenameClassifier.Data()) << Endl;
525  }
526 
527  // Book classifier object in python dict
528  PyDict_SetItemString(fLocalNS, "classifier", fClassifier);
529 
530  // Load data properties
531  // NOTE: This has to be repeated here for the reader application
532  fNvars = GetNVariables();
533  fNoutputs = DataInfo().GetNClasses();
534 }
535 
536 //_______________________________________________________________________
537 const Ranking* MethodPyRandomForest::CreateRanking()
538 {
539  // Get feature importance from classifier as an array with length equal
540  // number of variables, higher value signals a higher importance
541  PyArrayObject* pRanking = (PyArrayObject*) PyObject_GetAttrString(fClassifier, "feature_importances_");
542  if(pRanking == 0) Log() << kFATAL << "Failed to get ranking from classifier" << Endl;
543 
544  // Fill ranking object and return it
545  fRanking = new Ranking(GetName(), "Variable Importance");
546  Double_t* rankingData = (Double_t*) PyArray_DATA(pRanking);
547  for(UInt_t iVar=0; iVar<fNvars; iVar++){
548  fRanking->AddRank(Rank(GetInputLabel(iVar), rankingData[iVar]));
549  }
550 
551  Py_DECREF(pRanking);
552 
553  return fRanking;
554 }
555 
556 //_______________________________________________________________________
557 void MethodPyRandomForest::GetHelpMessage() const
558 {
559  // typical length of text line:
560  // "|--------------------------------------------------------------|"
561  Log() << "A random forest is a meta estimator that fits a number of decision" << Endl;
562  Log() << "tree classifiers on various sub-samples of the dataset and use" << Endl;
563  Log() << "averaging to improve the predictive accuracy and control over-fitting." << Endl;
564  Log() << Endl;
565  Log() << "Check out the scikit-learn documentation for more information." << Endl;
566 }