Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodBase.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  * *
32  **********************************************************************************/
33 
34 /*! \class TMVA::MethodBase
35 \ingroup TMVA
36 
37  Virtual base Class for all MVA method
38 
39  MethodBase hosts several specific evaluation methods.
40 
41  The kind of MVA that provides optimal performance in an analysis strongly
42  depends on the particular application. The evaluation factory provides a
43  number of numerical benchmark results to directly assess the performance
44  of the MVA training on the independent test sample. These are:
45 
46  - The _signal efficiency_ at three representative background efficiencies
47  (which is 1 &minus; rejection).
48  - The _significance_ of an MVA estimator, defined by the difference
49  between the MVA mean values for signal and background, divided by the
50  quadratic sum of their root mean squares.
51  - The _separation_ of an MVA _x_, defined by the integral
52  \f[
53  \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54  \f]
55  where
56  \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57  respectively. The separation is zero for identical signal and background MVA
58  shapes, and it is one for disjunctive shapes.
59  - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60  The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61  a uniform background distribution. In this way, the signal distributions
62  \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63  \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64  The \f$ \mu_{transform} \f$ is
65  [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66 
67  The MVA standard output also prints the linear correlation coefficients between
68  signal and background, which can be useful to eliminate variables that exhibit too
69  strong correlations.
70 */
71 
72 #include "TMVA/MethodBase.h"
73 
74 #include "TMVA/Config.h"
75 #include "TMVA/Configurable.h"
76 #include "TMVA/DataSetInfo.h"
77 #include "TMVA/DataSet.h"
78 #include "TMVA/Factory.h"
79 #include "TMVA/IMethod.h"
80 #include "TMVA/MsgLogger.h"
81 #include "TMVA/PDF.h"
82 #include "TMVA/Ranking.h"
83 #include "TMVA/Factory.h"
84 #include "TMVA/DataLoader.h"
85 #include "TMVA/Tools.h"
86 #include "TMVA/Results.h"
88 #include "TMVA/ResultsRegression.h"
89 #include "TMVA/ResultsMulticlass.h"
90 #include "TMVA/RootFinder.h"
91 #include "TMVA/Timer.h"
92 #include "TMVA/Tools.h"
93 #include "TMVA/TSpline1.h"
94 #include "TMVA/Types.h"
98 #include "TMVA/VariableInfo.h"
101 #include "TMVA/VariableTransform.h"
102 #include "TMVA/Version.h"
103 
104 #include "TROOT.h"
105 #include "TSystem.h"
106 #include "TObjString.h"
107 #include "TQObject.h"
108 #include "TSpline.h"
109 #include "TMatrix.h"
110 #include "TMath.h"
111 #include "TH1F.h"
112 #include "TH2F.h"
113 #include "TFile.h"
114 #include "TKey.h"
115 #include "TGraph.h"
116 #include "Riostream.h"
117 #include "TXMLEngine.h"
118 
119 #include <iomanip>
120 #include <iostream>
121 #include <fstream>
122 #include <sstream>
123 #include <cstdlib>
124 #include <algorithm>
125 #include <limits>
126 
127 
128 ClassImp(TMVA::MethodBase);
129 
130 using std::endl;
131 using std::atof;
132 
133 //const Int_t MethodBase_MaxIterations_ = 200;
134 const Bool_t Use_Splines_for_Eff_ = kTRUE;
135 
136 //const Int_t NBIN_HIST_PLOT = 100;
137 const Int_t NBIN_HIST_HIGH = 10000;
138 
139 #ifdef _WIN32
140 /* Disable warning C4355: 'this' : used in base member initializer list */
141 #pragma warning ( disable : 4355 )
142 #endif
143 
144 
145 #include "TGraph.h"
146 #include "TMultiGraph.h"
147 
148 ////////////////////////////////////////////////////////////////////////////////
149 /// standard constructor
150 
151 TMVA::IPythonInteractive::IPythonInteractive() : fMultiGraph(new TMultiGraph())
152 {
153  fNumGraphs = 0;
154  fIndex = 0;
155 }
156 
157 ////////////////////////////////////////////////////////////////////////////////
158 /// standard destructor
159 TMVA::IPythonInteractive::~IPythonInteractive()
160 {
161  if (fMultiGraph){
162  delete fMultiGraph;
163  fMultiGraph = nullptr;
164  }
165  return;
166 }
167 
168 ////////////////////////////////////////////////////////////////////////////////
169 /// This function gets some title and it creates a TGraph for every title.
170 /// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
171 ///
172 /// \param[in] graphTitles vector of titles
173 
174 void TMVA::IPythonInteractive::Init(std::vector<TString>& graphTitles)
175 {
176  if (fNumGraphs!=0){
177  std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
178  return;
179  }
180  Int_t color = 2;
181  for(auto& title : graphTitles){
182  fGraphs.push_back( new TGraph() );
183  fGraphs.back()->SetTitle(title);
184  fGraphs.back()->SetName(title);
185  fGraphs.back()->SetFillColor(color);
186  fGraphs.back()->SetLineColor(color);
187  fGraphs.back()->SetMarkerColor(color);
188  fMultiGraph->Add(fGraphs.back());
189  color += 2;
190  fNumGraphs += 1;
191  }
192  return;
193 }
194 
195 ////////////////////////////////////////////////////////////////////////////////
196 /// This function sets the point number to 0 for all graphs.
197 
198 void TMVA::IPythonInteractive::ClearGraphs()
199 {
200  for(Int_t i=0; i<fNumGraphs; i++){
201  fGraphs[i]->Set(0);
202  }
203 }
204 
205 ////////////////////////////////////////////////////////////////////////////////
206 /// This function is used only in 2 TGraph case, and it will add new data points to graphs.
207 ///
208 /// \param[in] x the x coordinate
209 /// \param[in] y1 the y coordinate for the first TGraph
210 /// \param[in] y2 the y coordinate for the second TGraph
211 
212 void TMVA::IPythonInteractive::AddPoint(Double_t x, Double_t y1, Double_t y2)
213 {
214  fGraphs[0]->Set(fIndex+1);
215  fGraphs[1]->Set(fIndex+1);
216  fGraphs[0]->SetPoint(fIndex, x, y1);
217  fGraphs[1]->SetPoint(fIndex, x, y2);
218  fIndex++;
219  return;
220 }
221 
222 ////////////////////////////////////////////////////////////////////////////////
223 /// This function can add data points to as many TGraphs as we have.
224 ///
225 /// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
226 /// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
227 
228 void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
229 {
230  for(Int_t i=0; i<fNumGraphs;i++){
231  fGraphs[i]->Set(fIndex+1);
232  fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
233  }
234  fIndex++;
235  return;
236 }
237 
238 
239 ////////////////////////////////////////////////////////////////////////////////
240 /// standard constructor
241 
242 TMVA::MethodBase::MethodBase( const TString& jobName,
243  Types::EMVA methodType,
244  const TString& methodTitle,
245  DataSetInfo& dsi,
246  const TString& theOption) :
247  IMethod(),
248  Configurable ( theOption ),
249  fTmpEvent ( 0 ),
250  fRanking ( 0 ),
251  fInputVars ( 0 ),
252  fAnalysisType ( Types::kNoAnalysisType ),
253  fRegressionReturnVal ( 0 ),
254  fMulticlassReturnVal ( 0 ),
255  fDataSetInfo ( dsi ),
256  fSignalReferenceCut ( 0.5 ),
257  fSignalReferenceCutOrientation( 1. ),
258  fVariableTransformType ( Types::kSignal ),
259  fJobName ( jobName ),
260  fMethodName ( methodTitle ),
261  fMethodType ( methodType ),
262  fTestvar ( "" ),
263  fTMVATrainingVersion ( TMVA_VERSION_CODE ),
264  fROOTTrainingVersion ( ROOT_VERSION_CODE ),
265  fConstructedFromWeightFile ( kFALSE ),
266  fBaseDir ( 0 ),
267  fMethodBaseDir ( 0 ),
268  fFile ( 0 ),
269  fSilentFile (kFALSE),
270  fModelPersistence (kTRUE),
271  fWeightFile ( "" ),
272  fEffS ( 0 ),
273  fDefaultPDF ( 0 ),
274  fMVAPdfS ( 0 ),
275  fMVAPdfB ( 0 ),
276  fSplS ( 0 ),
277  fSplB ( 0 ),
278  fSpleffBvsS ( 0 ),
279  fSplTrainS ( 0 ),
280  fSplTrainB ( 0 ),
281  fSplTrainEffBvsS ( 0 ),
282  fVarTransformString ( "None" ),
283  fTransformationPointer ( 0 ),
284  fTransformation ( dsi, methodTitle ),
285  fVerbose ( kFALSE ),
286  fVerbosityLevelString ( "Default" ),
287  fHelp ( kFALSE ),
288  fHasMVAPdfs ( kFALSE ),
289  fIgnoreNegWeightsInTraining( kFALSE ),
290  fSignalClass ( 0 ),
291  fBackgroundClass ( 0 ),
292  fSplRefS ( 0 ),
293  fSplRefB ( 0 ),
294  fSplTrainRefS ( 0 ),
295  fSplTrainRefB ( 0 ),
296  fSetupCompleted (kFALSE)
297 {
298  SetTestvarName();
299  fLogger->SetSource(GetName());
300 
301 // // default extension for weight files
302 }
303 
304 ////////////////////////////////////////////////////////////////////////////////
305 /// constructor used for Testing + Application of the MVA,
306 /// only (no training), using given WeightFiles
307 
308 TMVA::MethodBase::MethodBase( Types::EMVA methodType,
309  DataSetInfo& dsi,
310  const TString& weightFile ) :
311  IMethod(),
312  Configurable(""),
313  fTmpEvent ( 0 ),
314  fRanking ( 0 ),
315  fInputVars ( 0 ),
316  fAnalysisType ( Types::kNoAnalysisType ),
317  fRegressionReturnVal ( 0 ),
318  fMulticlassReturnVal ( 0 ),
319  fDataSetInfo ( dsi ),
320  fSignalReferenceCut ( 0.5 ),
321  fVariableTransformType ( Types::kSignal ),
322  fJobName ( "" ),
323  fMethodName ( "MethodBase" ),
324  fMethodType ( methodType ),
325  fTestvar ( "" ),
326  fTMVATrainingVersion ( 0 ),
327  fROOTTrainingVersion ( 0 ),
328  fConstructedFromWeightFile ( kTRUE ),
329  fBaseDir ( 0 ),
330  fMethodBaseDir ( 0 ),
331  fFile ( 0 ),
332  fSilentFile (kFALSE),
333  fModelPersistence (kTRUE),
334  fWeightFile ( weightFile ),
335  fEffS ( 0 ),
336  fDefaultPDF ( 0 ),
337  fMVAPdfS ( 0 ),
338  fMVAPdfB ( 0 ),
339  fSplS ( 0 ),
340  fSplB ( 0 ),
341  fSpleffBvsS ( 0 ),
342  fSplTrainS ( 0 ),
343  fSplTrainB ( 0 ),
344  fSplTrainEffBvsS ( 0 ),
345  fVarTransformString ( "None" ),
346  fTransformationPointer ( 0 ),
347  fTransformation ( dsi, "" ),
348  fVerbose ( kFALSE ),
349  fVerbosityLevelString ( "Default" ),
350  fHelp ( kFALSE ),
351  fHasMVAPdfs ( kFALSE ),
352  fIgnoreNegWeightsInTraining( kFALSE ),
353  fSignalClass ( 0 ),
354  fBackgroundClass ( 0 ),
355  fSplRefS ( 0 ),
356  fSplRefB ( 0 ),
357  fSplTrainRefS ( 0 ),
358  fSplTrainRefB ( 0 ),
359  fSetupCompleted (kFALSE)
360 {
361  fLogger->SetSource(GetName());
362 // // constructor used for Testing + Application of the MVA,
363 // // only (no training), using given WeightFiles
364 }
365 
366 ////////////////////////////////////////////////////////////////////////////////
367 /// destructor
368 
369 TMVA::MethodBase::~MethodBase( void )
370 {
371  // destructor
372  if (!fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
373 
374  // destructor
375  if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
376  if (fRanking != 0) delete fRanking;
377 
378  // PDFs
379  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
380  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
381  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
382 
383  // Splines
384  if (fSplS) { delete fSplS; fSplS = 0; }
385  if (fSplB) { delete fSplB; fSplB = 0; }
386  if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
387  if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
388  if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
389  if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
390  if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
391  if (fSplTrainEffBvsS) { delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
392 
393  for (Int_t i = 0; i < 2; i++ ) {
394  if (fEventCollections.at(i)) {
395  for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
396  it != fEventCollections.at(i)->end(); ++it) {
397  delete (*it);
398  }
399  delete fEventCollections.at(i);
400  fEventCollections.at(i) = 0;
401  }
402  }
403 
404  if (fRegressionReturnVal) delete fRegressionReturnVal;
405  if (fMulticlassReturnVal) delete fMulticlassReturnVal;
406 }
407 
408 ////////////////////////////////////////////////////////////////////////////////
409 /// setup of methods
410 
411 void TMVA::MethodBase::SetupMethod()
412 {
413  // setup of methods
414 
415  if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
416  InitBase();
417  DeclareBaseOptions();
418  Init();
419  DeclareOptions();
420  fSetupCompleted = kTRUE;
421 }
422 
423 ////////////////////////////////////////////////////////////////////////////////
424 /// process all options
425 /// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
426 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
427 
428 void TMVA::MethodBase::ProcessSetup()
429 {
430  ProcessBaseOptions();
431  ProcessOptions();
432 }
433 
434 ////////////////////////////////////////////////////////////////////////////////
435 /// check may be overridden by derived class
436 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
437 
438 void TMVA::MethodBase::CheckSetup()
439 {
440  CheckForUnusedOptions();
441 }
442 
443 ////////////////////////////////////////////////////////////////////////////////
444 /// default initialization called by all constructors
445 
446 void TMVA::MethodBase::InitBase()
447 {
448  SetConfigDescription( "Configuration options for classifier architecture and tuning" );
449 
450  fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
451  fNbinsMVAoutput = gConfig().fVariablePlotting.fNbinsMVAoutput;
452  fNbinsH = NBIN_HIST_HIGH;
453 
454  fSplTrainS = 0;
455  fSplTrainB = 0;
456  fSplTrainEffBvsS = 0;
457  fMeanS = -1;
458  fMeanB = -1;
459  fRmsS = -1;
460  fRmsB = -1;
461  fXmin = DBL_MAX;
462  fXmax = -DBL_MAX;
463  fTxtWeightsOnly = kTRUE;
464  fSplRefS = 0;
465  fSplRefB = 0;
466 
467  fTrainTime = -1.;
468  fTestTime = -1.;
469 
470  fRanking = 0;
471 
472  // temporary until the move to DataSet is complete
473  fInputVars = new std::vector<TString>;
474  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
475  fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
476  }
477  fRegressionReturnVal = 0;
478  fMulticlassReturnVal = 0;
479 
480  fEventCollections.resize( 2 );
481  fEventCollections.at(0) = 0;
482  fEventCollections.at(1) = 0;
483 
484  // retrieve signal and background class index
485  if (DataInfo().GetClassInfo("Signal") != 0) {
486  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
487  }
488  if (DataInfo().GetClassInfo("Background") != 0) {
489  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
490  }
491 
492  SetConfigDescription( "Configuration options for MVA method" );
493  SetConfigName( TString("Method") + GetMethodTypeName() );
494 }
495 
496 ////////////////////////////////////////////////////////////////////////////////
497 /// define the options (their key words) that can be set in the option string
498 /// here the options valid for ALL MVA methods are declared.
499 ///
500 /// know options:
501 ///
502 /// - VariableTransform=None,Decorrelated,PCA to use transformed variables
503 /// instead of the original ones
504 /// - VariableTransformType=Signal,Background which decorrelation matrix to use
505 /// in the method. Only the Likelihood
506 /// Method can make proper use of independent
507 /// transformations of signal and background
508 /// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
509 /// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
510 /// - fHasMVAPdfs create PDFs for the MVA outputs
511 /// - V for Verbose output (!V) for non verbos
512 /// - H for Help message
513 
514 void TMVA::MethodBase::DeclareBaseOptions()
515 {
516  DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
517 
518  DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
519  AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
520  AddPreDefVal( TString("Debug") );
521  AddPreDefVal( TString("Verbose") );
522  AddPreDefVal( TString("Info") );
523  AddPreDefVal( TString("Warning") );
524  AddPreDefVal( TString("Error") );
525  AddPreDefVal( TString("Fatal") );
526 
527  // If True (default): write all training results (weights) as text files only;
528  // if False: write also in ROOT format (not available for all methods - will abort if not
529  fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
530  fNormalise = kFALSE; // OBSOLETE !!!
531 
532  DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
533 
534  DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
535 
536  DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
537 
538  DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
539  "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
540 }
541 
542 ////////////////////////////////////////////////////////////////////////////////
543 /// the option string is decoded, for available options see "DeclareOptions"
544 
545 void TMVA::MethodBase::ProcessBaseOptions()
546 {
547  if (HasMVAPdfs()) {
548  // setting the default bin num... maybe should be static ? ==> Please no static (JS)
549  // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
550  // reading every PDF's definition and passing the option string to the next one to be read and marked
551  fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
552  fDefaultPDF->DeclareOptions();
553  fDefaultPDF->ParseOptions();
554  fDefaultPDF->ProcessOptions();
555  fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
556  fMVAPdfB->DeclareOptions();
557  fMVAPdfB->ParseOptions();
558  fMVAPdfB->ProcessOptions();
559  fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
560  fMVAPdfS->DeclareOptions();
561  fMVAPdfS->ParseOptions();
562  fMVAPdfS->ProcessOptions();
563 
564  // the final marked option string is written back to the original methodbase
565  SetOptions( fMVAPdfS->GetOptions() );
566  }
567 
568  TMVA::CreateVariableTransforms( fVarTransformString,
569  DataInfo(),
570  GetTransformationHandler(),
571  Log() );
572 
573  if (!HasMVAPdfs()) {
574  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
575  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
576  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
577  }
578 
579  if (fVerbose) { // overwrites other settings
580  fVerbosityLevelString = TString("Verbose");
581  Log().SetMinType( kVERBOSE );
582  }
583  else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
584  else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
585  else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
586  else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
587  else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
588  else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
589  else if (fVerbosityLevelString != "Default" ) {
590  Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
591  << fVerbosityLevelString << "' unknown." << Endl;
592  }
593  Event::SetIgnoreNegWeightsInTraining(fIgnoreNegWeightsInTraining);
594 }
595 
596 ////////////////////////////////////////////////////////////////////////////////
597 /// options that are used ONLY for the READER to ensure backward compatibility
598 /// they are hence without any effect (the reader is only reading the training
599 /// options that HAD been used at the training of the .xml weight file at hand
600 
601 void TMVA::MethodBase::DeclareCompatibilityOptions()
602 {
603  DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
604  DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
605  DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
606  "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
607  AddPreDefVal( TString("Signal") );
608  AddPreDefVal( TString("Background") );
609  DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
610  // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
611  // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
612  // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
613  // AddPreDefVal( TString("Debug") );
614  // AddPreDefVal( TString("Verbose") );
615  // AddPreDefVal( TString("Info") );
616  // AddPreDefVal( TString("Warning") );
617  // AddPreDefVal( TString("Error") );
618  // AddPreDefVal( TString("Fatal") );
619  DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
620  DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
621 }
622 
623 
624 ////////////////////////////////////////////////////////////////////////////////
625 /// call the Optimizer with the set of parameters and ranges that
626 /// are meant to be tuned.
627 
628 std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
629 {
630  // this is just a dummy... needs to be implemented for each method
631  // individually (as long as we don't have it automatized via the
632  // configuration string
633 
634  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
635  << GetName() << Endl;
636  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
637 
638  std::map<TString,Double_t> tunedParameters;
639  tunedParameters.size(); // just to get rid of "unused" warning
640  return tunedParameters;
641 
642 }
643 
644 ////////////////////////////////////////////////////////////////////////////////
645 /// set the tuning parameters according to the argument
646 /// This is just a dummy .. have a look at the MethodBDT how you could
647 /// perhaps implement the same thing for the other Classifiers..
648 
649 void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
650 {
651 }
652 
653 ////////////////////////////////////////////////////////////////////////////////
654 
655 void TMVA::MethodBase::TrainMethod()
656 {
657  Data()->SetCurrentType(Types::kTraining);
658  Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
659 
660  // train the MVA method
661  if (Help()) PrintHelpMessage();
662 
663  // all histograms should be created in the method's subdirectory
664  if(!IsSilentFile()) BaseDir()->cd();
665 
666  // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
667  // needed for this classifier
668  GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
669 
670  // call training of derived MVA
671  Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
672  << "Begin training" << Endl;
673  Long64_t nEvents = Data()->GetNEvents();
674  Timer traintimer( nEvents, GetName(), kTRUE );
675  Train();
676  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
677  << "\tEnd of training " << Endl;
678  SetTrainTime(traintimer.ElapsedSeconds());
679  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
680  << "Elapsed time for training with " << nEvents << " events: "
681  << traintimer.GetElapsedTime() << " " << Endl;
682 
683  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
684  << "\tCreate MVA output for ";
685 
686  // create PDFs for the signal and background MVA distributions (if required)
687  if (DoMulticlass()) {
688  Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
689  AddMulticlassOutput(Types::kTraining);
690  }
691  else if (!DoRegression()) {
692 
693  Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
694  AddClassifierOutput(Types::kTraining);
695  if (HasMVAPdfs()) {
696  CreateMVAPdfs();
697  AddClassifierOutputProb(Types::kTraining);
698  }
699 
700  } else {
701 
702  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
703  AddRegressionOutput( Types::kTraining );
704 
705  if (HasMVAPdfs() ) {
706  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
707  CreateMVAPdfs();
708  }
709  }
710 
711  // write the current MVA state into stream
712  // produced are one text file and one ROOT file
713  if (fModelPersistence ) WriteStateToFile();
714 
715  // produce standalone make class (presently only supported for classification)
716  if ((!DoRegression()) && (fModelPersistence)) MakeClass();
717 
718  // write additional monitoring histograms to main target file (not the weight file)
719  // again, make sure the histograms go into the method's subdirectory
720  if(!IsSilentFile())
721  {
722  BaseDir()->cd();
723  WriteMonitoringHistosToFile();
724  }
725 }
726 
727 ////////////////////////////////////////////////////////////////////////////////
728 
729 void TMVA::MethodBase::GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t& stddev, Double_t& stddev90Percent ) const
730 {
731  if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
732  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
733  ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), Types::kTesting, Types::kRegression);
734  bool truncate = false;
735  TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
736  stddev = sqrt(h1->GetMean());
737  truncate = true;
738  Double_t yq[1], xq[]={0.9};
739  h1->GetQuantiles(1,yq,xq);
740  TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
741  stddev90Percent = sqrt(h2->GetMean());
742  delete h1;
743  delete h2;
744 }
745 
746 ////////////////////////////////////////////////////////////////////////////////
747 /// prepare tree branch with the method's discriminating variable
748 
749 void TMVA::MethodBase::AddRegressionOutput(Types::ETreeType type)
750 {
751  Data()->SetCurrentType(type);
752 
753  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
754 
755  ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), type, Types::kRegression);
756 
757  Long64_t nEvents = Data()->GetNEvents();
758 
759  // use timer
760  Timer timer( nEvents, GetName(), kTRUE );
761  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
762  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
763 
764  regRes->Resize( nEvents );
765 
766  // Drawing the progress bar every event was causing a huge slowdown in the evaluation time
767  // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100
768 
769  Int_t totalProgressDraws = 100; // total number of times to update the progress bar
770  Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws
771  if(nEvents >= totalProgressDraws) drawProgressEvery = nEvents/totalProgressDraws;
772 
773  for (Int_t ievt=0; ievt<nEvents; ievt++) {
774 
775  Data()->SetCurrentEvent(ievt);
776  std::vector< Float_t > vals = GetRegressionValues();
777  regRes->SetValue( vals, ievt );
778 
779  // Only draw the progress bar once in a while, doing this every event causes the evaluation to be ridiculously slow
780  if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.DrawProgressBar( ievt );
781  }
782 
783  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
784  << "Elapsed time for evaluation of " << nEvents << " events: "
785  << timer.GetElapsedTime() << " " << Endl;
786 
787  // store time used for testing
788  if (type==Types::kTesting)
789  SetTestTime(timer.ElapsedSeconds());
790 
791  TString histNamePrefix(GetTestvarName());
792  histNamePrefix += (type==Types::kTraining?"train":"test");
793  regRes->CreateDeviationHistograms( histNamePrefix );
794 }
795 
796 ////////////////////////////////////////////////////////////////////////////////
797 /// prepare tree branch with the method's discriminating variable
798 
799 void TMVA::MethodBase::AddMulticlassOutput(Types::ETreeType type)
800 {
801  Data()->SetCurrentType(type);
802 
803  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
804 
805  ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
806  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
807 
808  Long64_t nEvents = Data()->GetNEvents();
809 
810  // use timer
811  Timer timer( nEvents, GetName(), kTRUE );
812 
813  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
814  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
815 
816  resMulticlass->Resize( nEvents );
817  for (Int_t ievt=0; ievt<nEvents; ievt++) {
818  Data()->SetCurrentEvent(ievt);
819  std::vector< Float_t > vals = GetMulticlassValues();
820  resMulticlass->SetValue( vals, ievt );
821  timer.DrawProgressBar( ievt );
822  }
823 
824  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
825  << "Elapsed time for evaluation of " << nEvents << " events: "
826  << timer.GetElapsedTime() << " " << Endl;
827 
828  // store time used for testing
829  if (type==Types::kTesting)
830  SetTestTime(timer.ElapsedSeconds());
831 
832  TString histNamePrefix(GetTestvarName());
833  histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
834 
835  resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
836  resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
837 }
838 
839 ////////////////////////////////////////////////////////////////////////////////
840 
841 void TMVA::MethodBase::NoErrorCalc(Double_t* const err, Double_t* const errUpper) {
842  if (err) *err=-1;
843  if (errUpper) *errUpper=-1;
844 }
845 
846 ////////////////////////////////////////////////////////////////////////////////
847 
848 Double_t TMVA::MethodBase::GetMvaValue( const Event* const ev, Double_t* err, Double_t* errUpper ) {
849  fTmpEvent = ev;
850  Double_t val = GetMvaValue(err, errUpper);
851  fTmpEvent = 0;
852  return val;
853 }
854 
855 ////////////////////////////////////////////////////////////////////////////////
856 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
857 /// for a quick determination if an event would be selected as signal or background
858 
859 Bool_t TMVA::MethodBase::IsSignalLike() {
860  return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
861 }
862 ////////////////////////////////////////////////////////////////////////////////
863 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
864 /// for a quick determination if an event with this mva output value would be selected as signal or background
865 
866 Bool_t TMVA::MethodBase::IsSignalLike(Double_t mvaVal) {
867  return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
868 }
869 
870 ////////////////////////////////////////////////////////////////////////////////
871 /// prepare tree branch with the method's discriminating variable
872 
873 void TMVA::MethodBase::AddClassifierOutput( Types::ETreeType type )
874 {
875  Data()->SetCurrentType(type);
876 
877  ResultsClassification* clRes =
878  (ResultsClassification*)Data()->GetResults(GetMethodName(), type, Types::kClassification );
879 
880  Long64_t nEvents = Data()->GetNEvents();
881  clRes->Resize( nEvents );
882 
883  // use timer
884  Timer timer( nEvents, GetName(), kTRUE );
885  std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
886 
887  // store time used for testing
888  if (type==Types::kTesting)
889  SetTestTime(timer.ElapsedSeconds());
890 
891  // load mva values to results object
892  for (Int_t ievt=0; ievt<nEvents; ievt++) {
893  clRes->SetValue( mvaValues[ievt], ievt );
894  }
895 }
896 
897 ////////////////////////////////////////////////////////////////////////////////
898 /// get all the MVA values for the events of the current Data type
899 std::vector<Double_t> TMVA::MethodBase::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
900 {
901 
902  Long64_t nEvents = Data()->GetNEvents();
903  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
904  if (firstEvt < 0) firstEvt = 0;
905  std::vector<Double_t> values(lastEvt-firstEvt);
906  // log in case of looping on all the events
907  nEvents = values.size();
908 
909  // use timer
910  Timer timer( nEvents, GetName(), kTRUE );
911 
912  if (logProgress)
913  Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
914  << "Evaluation of " << GetMethodName() << " on "
915  << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
916  << " sample (" << nEvents << " events)" << Endl;
917 
918  for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
919  Data()->SetCurrentEvent(ievt);
920  values[ievt] = GetMvaValue();
921 
922  // print progress
923  if (logProgress) {
924  Int_t modulo = Int_t(nEvents/100);
925  if (modulo <= 0 ) modulo = 1;
926  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
927  }
928  }
929  if (logProgress) {
930  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
931  << "Elapsed time for evaluation of " << nEvents << " events: "
932  << timer.GetElapsedTime() << " " << Endl;
933  }
934 
935  return values;
936 }
937 
938 ////////////////////////////////////////////////////////////////////////////////
939 /// prepare tree branch with the method's discriminating variable
940 
941 void TMVA::MethodBase::AddClassifierOutputProb( Types::ETreeType type )
942 {
943  Data()->SetCurrentType(type);
944 
945  ResultsClassification* mvaProb =
946  (ResultsClassification*)Data()->GetResults(TString("prob_")+GetMethodName(), type, Types::kClassification );
947 
948  Long64_t nEvents = Data()->GetNEvents();
949 
950  // use timer
951  Timer timer( nEvents, GetName(), kTRUE );
952 
953  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
954  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
955 
956  mvaProb->Resize( nEvents );
957  for (Int_t ievt=0; ievt<nEvents; ievt++) {
958 
959  Data()->SetCurrentEvent(ievt);
960  Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
961  if (proba < 0) break;
962  mvaProb->SetValue( proba, ievt );
963 
964  // print progress
965  Int_t modulo = Int_t(nEvents/100);
966  if (modulo <= 0 ) modulo = 1;
967  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
968  }
969 
970  Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
971  << "Elapsed time for evaluation of " << nEvents << " events: "
972  << timer.GetElapsedTime() << " " << Endl;
973 }
974 
975 ////////////////////////////////////////////////////////////////////////////////
976 /// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
977 ///
978 /// - bias = average deviation
979 /// - dev = average absolute deviation
980 /// - rms = rms of deviation
981 
982 void TMVA::MethodBase::TestRegression( Double_t& bias, Double_t& biasT,
983  Double_t& dev, Double_t& devT,
984  Double_t& rms, Double_t& rmsT,
985  Double_t& mInf, Double_t& mInfT,
986  Double_t& corr,
987  Types::ETreeType type )
988 {
989  Types::ETreeType savedType = Data()->GetCurrentType();
990  Data()->SetCurrentType(type);
991 
992  bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
993  Double_t sumw = 0;
994  Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
995  const Int_t nevt = GetNEvents();
996  Float_t* rV = new Float_t[nevt];
997  Float_t* tV = new Float_t[nevt];
998  Float_t* wV = new Float_t[nevt];
999  Float_t xmin = 1e30, xmax = -1e30;
1000  Log() << kINFO << "Calculate regression for all events" << Endl;
1001  Timer timer( nevt, GetName(), kTRUE );
1002  for (Long64_t ievt=0; ievt<nevt; ievt++) {
1003 
1004  const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
1005  Float_t t = ev->GetTarget(0);
1006  Float_t w = ev->GetWeight();
1007  Float_t r = GetRegressionValues()[0];
1008  Float_t d = (r-t);
1009 
1010  // find min/max
1011  xmin = TMath::Min(xmin, TMath::Min(t, r));
1012  xmax = TMath::Max(xmax, TMath::Max(t, r));
1013 
1014  // store for truncated RMS computation
1015  rV[ievt] = r;
1016  tV[ievt] = t;
1017  wV[ievt] = w;
1018 
1019  // compute deviation-squared
1020  sumw += w;
1021  bias += w * d;
1022  dev += w * TMath::Abs(d);
1023  rms += w * d * d;
1024 
1025  // compute correlation between target and regression estimate
1026  m1 += t*w; s1 += t*t*w;
1027  m2 += r*w; s2 += r*r*w;
1028  s12 += t*r;
1029  if ((ievt & 0xFF) == 0) timer.DrawProgressBar(ievt);
1030  }
1031  timer.DrawProgressBar(nevt - 1);
1032  Log() << kINFO << "Elapsed time for evaluation of " << nevt << " events: "
1033  << timer.GetElapsedTime() << " " << Endl;
1034 
1035  // standard quantities
1036  bias /= sumw;
1037  dev /= sumw;
1038  rms /= sumw;
1039  rms = TMath::Sqrt(rms - bias*bias);
1040 
1041  // correlation
1042  m1 /= sumw;
1043  m2 /= sumw;
1044  corr = s12/sumw - m1*m2;
1045  corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1046 
1047  // create histogram required for computation of mutual information
1048  TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1049  TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1050 
1051  // compute truncated RMS and fill histogram
1052  Double_t devMax = bias + 2*rms;
1053  Double_t devMin = bias - 2*rms;
1054  sumw = 0;
1055  int ic=0;
1056  for (Long64_t ievt=0; ievt<nevt; ievt++) {
1057  Float_t d = (rV[ievt] - tV[ievt]);
1058  hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1059  if (d >= devMin && d <= devMax) {
1060  sumw += wV[ievt];
1061  biasT += wV[ievt] * d;
1062  devT += wV[ievt] * TMath::Abs(d);
1063  rmsT += wV[ievt] * d * d;
1064  histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1065  ic++;
1066  }
1067  }
1068  biasT /= sumw;
1069  devT /= sumw;
1070  rmsT /= sumw;
1071  rmsT = TMath::Sqrt(rmsT - biasT*biasT);
1072  mInf = gTools().GetMutualInformation( *hist );
1073  mInfT = gTools().GetMutualInformation( *histT );
1074 
1075  delete hist;
1076  delete histT;
1077 
1078  delete [] rV;
1079  delete [] tV;
1080  delete [] wV;
1081 
1082  Data()->SetCurrentType(savedType);
1083 }
1084 
1085 
1086 ////////////////////////////////////////////////////////////////////////////////
1087 /// test multiclass classification
1088 
1089 void TMVA::MethodBase::TestMulticlass()
1090 {
1091  ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
1092  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1093 
1094  // GA evaluation of best cut for sig eff * sig pur. Slow, disabled for now.
1095  // Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test
1096  // data..." << Endl; for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1097  // resMulticlass->GetBestMultiClassCuts(icls);
1098  // }
1099 
1100  // Create histograms for use in TMVA GUI
1101  TString histNamePrefix(GetTestvarName());
1102  TString histNamePrefixTest{histNamePrefix + "_Test"};
1103  TString histNamePrefixTrain{histNamePrefix + "_Train"};
1104 
1105  resMulticlass->CreateMulticlassHistos(histNamePrefixTest, fNbinsMVAoutput, fNbinsH);
1106  resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTest);
1107 
1108  resMulticlass->CreateMulticlassHistos(histNamePrefixTrain, fNbinsMVAoutput, fNbinsH);
1109  resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTrain);
1110 }
1111 
1112 
1113 ////////////////////////////////////////////////////////////////////////////////
1114 /// initialization
1115 
1116 void TMVA::MethodBase::TestClassification()
1117 {
1118  Data()->SetCurrentType(Types::kTesting);
1119 
1120  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
1121  ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1122 
1123  // sanity checks: tree must exist, and theVar must be in tree
1124  if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1125  Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1126  << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1127  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1128  << " not found in tree" << Endl;
1129  }
1130 
1131  // basic statistics operations are made in base class
1132  gTools().ComputeStat( GetEventCollection(Types::kTesting), mvaRes->GetValueVector(),
1133  fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1134 
1135  // choose reasonable histogram ranges, by removing outliers
1136  Double_t nrms = 10;
1137  fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1138  fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1139 
1140  // determine cut orientation
1141  fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1142 
1143  // fill 2 types of histograms for the various analyses
1144  // this one is for actual plotting
1145 
1146  Double_t sxmax = fXmax+0.00001;
1147 
1148  // classifier response distributions for training sample
1149  // MVA plots used for graphics representation (signal)
1150  TString TestvarName;
1151  if(IsSilentFile())
1152  {
1153  TestvarName=Form("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1154  }else
1155  {
1156  TestvarName=GetTestvarName();
1157  }
1158  TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1159  TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1160  mvaRes->Store(mva_s, "MVA_S");
1161  mvaRes->Store(mva_b, "MVA_B");
1162  mva_s->Sumw2();
1163  mva_b->Sumw2();
1164 
1165  TH1* proba_s = 0;
1166  TH1* proba_b = 0;
1167  TH1* rarity_s = 0;
1168  TH1* rarity_b = 0;
1169  if (HasMVAPdfs()) {
1170  // P(MVA) plots used for graphics representation
1171  proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1172  proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1173  mvaRes->Store(proba_s, "Prob_S");
1174  mvaRes->Store(proba_b, "Prob_B");
1175  proba_s->Sumw2();
1176  proba_b->Sumw2();
1177 
1178  // R(MVA) plots used for graphics representation
1179  rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1180  rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1181  mvaRes->Store(rarity_s, "Rar_S");
1182  mvaRes->Store(rarity_b, "Rar_B");
1183  rarity_s->Sumw2();
1184  rarity_b->Sumw2();
1185  }
1186 
1187  // MVA plots used for efficiency calculations (large number of bins)
1188  TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1189  TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1190  mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1191  mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1192  mva_eff_s->Sumw2();
1193  mva_eff_b->Sumw2();
1194 
1195  // fill the histograms
1196 
1197  ResultsClassification* mvaProb = dynamic_cast<ResultsClassification*>
1198  (Data()->GetResults( TString("prob_")+GetMethodName(), Types::kTesting, Types::kMaxAnalysisType ) );
1199 
1200  Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1201  if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1202  std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1203 
1204  //LM: this is needed to avoid crashes in ROOCCURVE
1205  if ( mvaRes->GetSize() != GetNEvents() ) {
1206  Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1207  assert(mvaRes->GetSize() == GetNEvents());
1208  }
1209 
1210  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1211 
1212  const Event* ev = GetEvent(ievt);
1213  Float_t v = (*mvaRes)[ievt][0];
1214  Float_t w = ev->GetWeight();
1215 
1216  if (DataInfo().IsSignal(ev)) {
1217  mvaResTypes->push_back(kTRUE);
1218  mva_s ->Fill( v, w );
1219  if (mvaProb) {
1220  proba_s->Fill( (*mvaProb)[ievt][0], w );
1221  rarity_s->Fill( GetRarity( v ), w );
1222  }
1223 
1224  mva_eff_s ->Fill( v, w );
1225  }
1226  else {
1227  mvaResTypes->push_back(kFALSE);
1228  mva_b ->Fill( v, w );
1229  if (mvaProb) {
1230  proba_b->Fill( (*mvaProb)[ievt][0], w );
1231  rarity_b->Fill( GetRarity( v ), w );
1232  }
1233  mva_eff_b ->Fill( v, w );
1234  }
1235  }
1236 
1237  // uncomment those (and several others if you want unnormalized output
1238  gTools().NormHist( mva_s );
1239  gTools().NormHist( mva_b );
1240  gTools().NormHist( proba_s );
1241  gTools().NormHist( proba_b );
1242  gTools().NormHist( rarity_s );
1243  gTools().NormHist( rarity_b );
1244  gTools().NormHist( mva_eff_s );
1245  gTools().NormHist( mva_eff_b );
1246 
1247  // create PDFs from histograms, using default splines, and no additional smoothing
1248  if (fSplS) { delete fSplS; fSplS = 0; }
1249  if (fSplB) { delete fSplB; fSplB = 0; }
1250  fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1251  fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1252 }
1253 
1254 ////////////////////////////////////////////////////////////////////////////////
1255 /// general method used in writing the header of the weight files where
1256 /// the used variables, variable transformation type etc. is specified
1257 
1258 void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1259 {
1260  TString prefix = "";
1261  UserGroup_t * userInfo = gSystem->GetUserInfo();
1262 
1263  tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1264  tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1265  tf.setf(std::ios::left);
1266  tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1267  << GetTrainingTMVAVersionCode() << "]" << std::endl;
1268  tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1269  << GetTrainingROOTVersionCode() << "]" << std::endl;
1270  tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1271  tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1272  tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1273  tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1274  tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1275 
1276  TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1277 
1278  tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1279  tf << prefix << std::endl;
1280 
1281  delete userInfo;
1282 
1283  // First write all options
1284  tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1285  WriteOptionsToStream( tf, prefix );
1286  tf << prefix << std::endl;
1287 
1288  // Second write variable info
1289  tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1290  WriteVarsToStream( tf, prefix );
1291  tf << prefix << std::endl;
1292 }
1293 
1294 ////////////////////////////////////////////////////////////////////////////////
1295 /// xml writing
1296 
1297 void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1298 {
1299  void* it = gTools().AddChild(gi,"Info");
1300  gTools().AddAttr(it,"name", name);
1301  gTools().AddAttr(it,"value", value);
1302 }
1303 
1304 ////////////////////////////////////////////////////////////////////////////////
1305 
1306 void TMVA::MethodBase::AddOutput( Types::ETreeType type, Types::EAnalysisType analysisType ) {
1307  if (analysisType == Types::kRegression) {
1308  AddRegressionOutput( type );
1309  } else if (analysisType == Types::kMulticlass) {
1310  AddMulticlassOutput( type );
1311  } else {
1312  AddClassifierOutput( type );
1313  if (HasMVAPdfs())
1314  AddClassifierOutputProb( type );
1315  }
1316 }
1317 
1318 ////////////////////////////////////////////////////////////////////////////////
1319 /// general method used in writing the header of the weight files where
1320 /// the used variables, variable transformation type etc. is specified
1321 
1322 void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1323 {
1324  if (!parent) return;
1325 
1326  UserGroup_t* userInfo = gSystem->GetUserInfo();
1327 
1328  void* gi = gTools().AddChild(parent, "GeneralInfo");
1329  AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1330  AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1331  AddInfoItem( gi, "Creator", userInfo->fUser);
1332  TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1333  AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1334  AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1335  AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1336  AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1337 
1338  Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1339  TString analysisType((aType==Types::kRegression) ? "Regression" :
1340  (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1341  AddInfoItem( gi, "AnalysisType", analysisType );
1342  delete userInfo;
1343 
1344  // write options
1345  AddOptionsXMLTo( parent );
1346 
1347  // write variable info
1348  AddVarsXMLTo( parent );
1349 
1350  // write spectator info
1351  if (fModelPersistence)
1352  AddSpectatorsXMLTo( parent );
1353 
1354  // write class info if in multiclass mode
1355  AddClassesXMLTo(parent);
1356 
1357  // write target info if in regression mode
1358  if (DoRegression()) AddTargetsXMLTo(parent);
1359 
1360  // write transformations
1361  GetTransformationHandler(false).AddXMLTo( parent );
1362 
1363  // write MVA variable distributions
1364  void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1365  if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1366  if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1367 
1368  // write weights
1369  AddWeightsXMLTo( parent );
1370 }
1371 
1372 ////////////////////////////////////////////////////////////////////////////////
1373 /// write reference MVA distributions (and other information)
1374 /// to a ROOT type weight file
1375 
1376 void TMVA::MethodBase::ReadStateFromStream( TFile& rf )
1377 {
1378  Bool_t addDirStatus = TH1::AddDirectoryStatus();
1379  TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1380  fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1381  fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1382 
1383  TH1::AddDirectory( addDirStatus );
1384 
1385  ReadWeightsFromStream( rf );
1386 
1387  SetTestvarName();
1388 }
1389 
1390 ////////////////////////////////////////////////////////////////////////////////
1391 /// write options and weights to file
1392 /// note that each one text file for the main configuration information
1393 /// and one ROOT file for ROOT objects are created
1394 
1395 void TMVA::MethodBase::WriteStateToFile() const
1396 {
1397  // ---- create the text file
1398  TString tfname( GetWeightFileName() );
1399 
1400  // writing xml file
1401  TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1402  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1403  << "Creating xml weight file: "
1404  << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1405  void* doc = gTools().xmlengine().NewDoc();
1406  void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1407  gTools().xmlengine().DocSetRootElement(doc,rootnode);
1408  gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1409  WriteStateToXML(rootnode);
1410  gTools().xmlengine().SaveDoc(doc,xmlfname);
1411  gTools().xmlengine().FreeDoc(doc);
1412 }
1413 
1414 ////////////////////////////////////////////////////////////////////////////////
1415 /// Function to write options and weights to file
1416 
1417 void TMVA::MethodBase::ReadStateFromFile()
1418 {
1419  // get the filename
1420 
1421  TString tfname(GetWeightFileName());
1422 
1423  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1424  << "Reading weight file: "
1425  << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1426 
1427  if (tfname.EndsWith(".xml") ) {
1428  void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1429  if (!doc) {
1430  Log() << kFATAL << "Error parsing XML file " << tfname << Endl;
1431  }
1432  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1433  ReadStateFromXML(rootnode);
1434  gTools().xmlengine().FreeDoc(doc);
1435  }
1436  else {
1437  std::filebuf fb;
1438  fb.open(tfname.Data(),std::ios::in);
1439  if (!fb.is_open()) { // file not found --> Error
1440  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1441  << "Unable to open input weight file: " << tfname << Endl;
1442  }
1443  std::istream fin(&fb);
1444  ReadStateFromStream(fin);
1445  fb.close();
1446  }
1447  if (!fTxtWeightsOnly) {
1448  // ---- read the ROOT file
1449  TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1450  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1451  << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1452  TFile* rfile = TFile::Open( rfname, "READ" );
1453  ReadStateFromStream( *rfile );
1454  rfile->Close();
1455  }
1456 }
1457 ////////////////////////////////////////////////////////////////////////////////
1458 /// for reading from memory
1459 
1460 void TMVA::MethodBase::ReadStateFromXMLString( const char* xmlstr ) {
1461  void* doc = gTools().xmlengine().ParseString(xmlstr);
1462  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1463  ReadStateFromXML(rootnode);
1464  gTools().xmlengine().FreeDoc(doc);
1465 
1466  return;
1467 }
1468 
1469 ////////////////////////////////////////////////////////////////////////////////
1470 
1471 void TMVA::MethodBase::ReadStateFromXML( void* methodNode )
1472 {
1473 
1474  TString fullMethodName;
1475  gTools().ReadAttr( methodNode, "Method", fullMethodName );
1476 
1477  fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1478 
1479  // update logger
1480  Log().SetSource( GetName() );
1481  Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1482  << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1483 
1484  // after the method name is read, the testvar can be set
1485  SetTestvarName();
1486 
1487  TString nodeName("");
1488  void* ch = gTools().GetChild(methodNode);
1489  while (ch!=0) {
1490  nodeName = TString( gTools().GetName(ch) );
1491 
1492  if (nodeName=="GeneralInfo") {
1493  // read analysis type
1494 
1495  TString name(""),val("");
1496  void* antypeNode = gTools().GetChild(ch);
1497  while (antypeNode) {
1498  gTools().ReadAttr( antypeNode, "name", name );
1499 
1500  if (name == "TrainingTime")
1501  gTools().ReadAttr( antypeNode, "value", fTrainTime );
1502 
1503  if (name == "AnalysisType") {
1504  gTools().ReadAttr( antypeNode, "value", val );
1505  val.ToLower();
1506  if (val == "regression" ) SetAnalysisType( Types::kRegression );
1507  else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1508  else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1509  else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1510  }
1511 
1512  if (name == "TMVA Release" || name == "TMVA") {
1513  TString s;
1514  gTools().ReadAttr( antypeNode, "value", s);
1515  fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1516  Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1517  }
1518 
1519  if (name == "ROOT Release" || name == "ROOT") {
1520  TString s;
1521  gTools().ReadAttr( antypeNode, "value", s);
1522  fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1523  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1524  << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1525  }
1526  antypeNode = gTools().GetNextChild(antypeNode);
1527  }
1528  }
1529  else if (nodeName=="Options") {
1530  ReadOptionsFromXML(ch);
1531  ParseOptions();
1532 
1533  }
1534  else if (nodeName=="Variables") {
1535  ReadVariablesFromXML(ch);
1536  }
1537  else if (nodeName=="Spectators") {
1538  ReadSpectatorsFromXML(ch);
1539  }
1540  else if (nodeName=="Classes") {
1541  if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1542  }
1543  else if (nodeName=="Targets") {
1544  if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1545  }
1546  else if (nodeName=="Transformations") {
1547  GetTransformationHandler().ReadFromXML(ch);
1548  }
1549  else if (nodeName=="MVAPdfs") {
1550  TString pdfname;
1551  if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1552  if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1553  void* pdfnode = gTools().GetChild(ch);
1554  if (pdfnode) {
1555  gTools().ReadAttr(pdfnode, "Name", pdfname);
1556  fMVAPdfS = new PDF(pdfname);
1557  fMVAPdfS->ReadXML(pdfnode);
1558  pdfnode = gTools().GetNextChild(pdfnode);
1559  gTools().ReadAttr(pdfnode, "Name", pdfname);
1560  fMVAPdfB = new PDF(pdfname);
1561  fMVAPdfB->ReadXML(pdfnode);
1562  }
1563  }
1564  else if (nodeName=="Weights") {
1565  ReadWeightsFromXML(ch);
1566  }
1567  else {
1568  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1569  }
1570  ch = gTools().GetNextChild(ch);
1571 
1572  }
1573 
1574  // update transformation handler
1575  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1576 }
1577 
1578 ////////////////////////////////////////////////////////////////////////////////
1579 /// read the header from the weight files of the different MVA methods
1580 
1581 void TMVA::MethodBase::ReadStateFromStream( std::istream& fin )
1582 {
1583  char buf[512];
1584 
1585  // when reading from stream, we assume the files are produced with TMVA<=397
1586  SetAnalysisType(Types::kClassification);
1587 
1588 
1589  // first read the method name
1590  GetLine(fin,buf);
1591  while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1592  TString namestr(buf);
1593 
1594  TString methodType = namestr(0,namestr.Index("::"));
1595  methodType = methodType(methodType.Last(' '),methodType.Length());
1596  methodType = methodType.Strip(TString::kLeading);
1597 
1598  TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1599  methodName = methodName.Strip(TString::kLeading);
1600  if (methodName == "") methodName = methodType;
1601  fMethodName = methodName;
1602 
1603  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1604 
1605  // update logger
1606  Log().SetSource( GetName() );
1607 
1608  // now the question is whether to read the variables first or the options (well, of course the order
1609  // of writing them needs to agree)
1610  //
1611  // the option "Decorrelation" is needed to decide if the variables we
1612  // read are decorrelated or not
1613  //
1614  // the variables are needed by some methods (TMLP) to build the NN
1615  // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1616  // we read the variables, and then we process the options
1617 
1618  // now read all options
1619  GetLine(fin,buf);
1620  while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1621  ReadOptionsFromStream(fin);
1622  ParseOptions();
1623 
1624  // Now read variable info
1625  fin.getline(buf,512);
1626  while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1627  ReadVarsFromStream(fin);
1628 
1629  // now we process the options (of the derived class)
1630  ProcessOptions();
1631 
1632  if (IsNormalised()) {
1633  VariableNormalizeTransform* norm = (VariableNormalizeTransform*)
1634  GetTransformationHandler().AddTransformation( new VariableNormalizeTransform(DataInfo()), -1 );
1635  norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1636  }
1637  VariableTransformBase *varTrafo(0), *varTrafo2(0);
1638  if ( fVarTransformString == "None") {
1639  if (fUseDecorr)
1640  varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1641  } else if ( fVarTransformString == "Decorrelate" ) {
1642  varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1643  } else if ( fVarTransformString == "PCA" ) {
1644  varTrafo = GetTransformationHandler().AddTransformation( new VariablePCATransform(DataInfo()), -1 );
1645  } else if ( fVarTransformString == "Uniform" ) {
1646  varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1647  } else if ( fVarTransformString == "Gauss" ) {
1648  varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1649  } else if ( fVarTransformString == "GaussDecorr" ) {
1650  varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1651  varTrafo2 = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1652  } else {
1653  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1654  << fVarTransformString << "' unknown." << Endl;
1655  }
1656  // Now read decorrelation matrix if available
1657  if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1658  fin.getline(buf,512);
1659  while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1660  if (varTrafo) {
1661  TString trafo(fVariableTransformTypeString); trafo.ToLower();
1662  varTrafo->ReadTransformationFromStream(fin, trafo );
1663  }
1664  if (varTrafo2) {
1665  TString trafo(fVariableTransformTypeString); trafo.ToLower();
1666  varTrafo2->ReadTransformationFromStream(fin, trafo );
1667  }
1668  }
1669 
1670 
1671  if (HasMVAPdfs()) {
1672  // Now read the MVA PDFs
1673  fin.getline(buf,512);
1674  while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1675  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1676  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1677  fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1678  fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1679  fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1680  fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1681 
1682  fin >> *fMVAPdfS;
1683  fin >> *fMVAPdfB;
1684  }
1685 
1686  // Now read weights
1687  fin.getline(buf,512);
1688  while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1689  fin.getline(buf,512);
1690  ReadWeightsFromStream( fin );;
1691 
1692  // update transformation handler
1693  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1694 
1695 }
1696 
1697 ////////////////////////////////////////////////////////////////////////////////
1698 /// write the list of variables (name, min, max) for a given data
1699 /// transformation method to the stream
1700 
1701 void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1702 {
1703  o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1704  std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1705  for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1706  o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1707  varIt = DataInfo().GetSpectatorInfos().begin();
1708  for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1709 }
1710 
1711 ////////////////////////////////////////////////////////////////////////////////
1712 /// Read the variables (name, min, max) for a given data
1713 /// transformation method from the stream. In the stream we only
1714 /// expect the limits which will be set
1715 
1716 void TMVA::MethodBase::ReadVarsFromStream( std::istream& istr )
1717 {
1718  TString dummy;
1719  UInt_t readNVar;
1720  istr >> dummy >> readNVar;
1721 
1722  if (readNVar!=DataInfo().GetNVariables()) {
1723  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1724  << " while there are " << readNVar << " variables declared in the file"
1725  << Endl;
1726  }
1727 
1728  // we want to make sure all variables are read in the order they are defined
1729  VariableInfo varInfo;
1730  std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1731  int varIdx = 0;
1732  for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1733  varInfo.ReadFromStream(istr);
1734  if (varIt->GetExpression() == varInfo.GetExpression()) {
1735  varInfo.SetExternalLink((*varIt).GetExternalLink());
1736  (*varIt) = varInfo;
1737  }
1738  else {
1739  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1740  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1741  Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1742  Log() << kINFO << "the correct working of the method):" << Endl;
1743  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1744  Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1745  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1746  }
1747  }
1748 }
1749 
1750 ////////////////////////////////////////////////////////////////////////////////
1751 /// write variable info to XML
1752 
1753 void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1754 {
1755  void* vars = gTools().AddChild(parent, "Variables");
1756  gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1757 
1758  for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1759  VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1760  void* var = gTools().AddChild( vars, "Variable" );
1761  gTools().AddAttr( var, "VarIndex", idx );
1762  vi.AddToXML( var );
1763  }
1764 }
1765 
1766 ////////////////////////////////////////////////////////////////////////////////
1767 /// write spectator info to XML
1768 
1769 void TMVA::MethodBase::AddSpectatorsXMLTo( void* parent ) const
1770 {
1771  void* specs = gTools().AddChild(parent, "Spectators");
1772 
1773  UInt_t writeIdx=0;
1774  for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1775 
1776  VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1777 
1778  // we do not want to write spectators that are category-cuts,
1779  // except if the method is the category method and the spectators belong to it
1780  if (vi.GetVarType()=='C') continue;
1781 
1782  void* spec = gTools().AddChild( specs, "Spectator" );
1783  gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1784  vi.AddToXML( spec );
1785  }
1786  gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1787 }
1788 
1789 ////////////////////////////////////////////////////////////////////////////////
1790 /// write class info to XML
1791 
1792 void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1793 {
1794  UInt_t nClasses=DataInfo().GetNClasses();
1795 
1796  void* classes = gTools().AddChild(parent, "Classes");
1797  gTools().AddAttr( classes, "NClass", nClasses );
1798 
1799  for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1800  ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1801  TString className =classInfo->GetName();
1802  UInt_t classNumber=classInfo->GetNumber();
1803 
1804  void* classNode=gTools().AddChild(classes, "Class");
1805  gTools().AddAttr( classNode, "Name", className );
1806  gTools().AddAttr( classNode, "Index", classNumber );
1807  }
1808 }
1809 ////////////////////////////////////////////////////////////////////////////////
1810 /// write target info to XML
1811 
1812 void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1813 {
1814  void* targets = gTools().AddChild(parent, "Targets");
1815  gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1816 
1817  for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1818  VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1819  void* tar = gTools().AddChild( targets, "Target" );
1820  gTools().AddAttr( tar, "TargetIndex", idx );
1821  vi.AddToXML( tar );
1822  }
1823 }
1824 
1825 ////////////////////////////////////////////////////////////////////////////////
1826 /// read variable info from XML
1827 
1828 void TMVA::MethodBase::ReadVariablesFromXML( void* varnode )
1829 {
1830  UInt_t readNVar;
1831  gTools().ReadAttr( varnode, "NVar", readNVar);
1832 
1833  if (readNVar!=DataInfo().GetNVariables()) {
1834  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1835  << " while there are " << readNVar << " variables declared in the file"
1836  << Endl;
1837  }
1838 
1839  // we want to make sure all variables are read in the order they are defined
1840  VariableInfo readVarInfo, existingVarInfo;
1841  int varIdx = 0;
1842  void* ch = gTools().GetChild(varnode);
1843  while (ch) {
1844  gTools().ReadAttr( ch, "VarIndex", varIdx);
1845  existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1846  readVarInfo.ReadFromXML(ch);
1847 
1848  if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1849  readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1850  existingVarInfo = readVarInfo;
1851  }
1852  else {
1853  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1854  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1855  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1856  Log() << kINFO << "correct working of the method):" << Endl;
1857  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1858  Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1859  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1860  }
1861  ch = gTools().GetNextChild(ch);
1862  }
1863 }
1864 
1865 ////////////////////////////////////////////////////////////////////////////////
1866 /// read spectator info from XML
1867 
1868 void TMVA::MethodBase::ReadSpectatorsFromXML( void* specnode )
1869 {
1870  UInt_t readNSpec;
1871  gTools().ReadAttr( specnode, "NSpec", readNSpec);
1872 
1873  if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1874  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1875  << " while there are " << readNSpec << " spectators declared in the file"
1876  << Endl;
1877  }
1878 
1879  // we want to make sure all variables are read in the order they are defined
1880  VariableInfo readSpecInfo, existingSpecInfo;
1881  int specIdx = 0;
1882  void* ch = gTools().GetChild(specnode);
1883  while (ch) {
1884  gTools().ReadAttr( ch, "SpecIndex", specIdx);
1885  existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1886  readSpecInfo.ReadFromXML(ch);
1887 
1888  if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1889  readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1890  existingSpecInfo = readSpecInfo;
1891  }
1892  else {
1893  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1894  Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1895  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1896  Log() << kINFO << "correct working of the method):" << Endl;
1897  Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1898  Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1899  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1900  }
1901  ch = gTools().GetNextChild(ch);
1902  }
1903 }
1904 
1905 ////////////////////////////////////////////////////////////////////////////////
1906 /// read number of classes from XML
1907 
1908 void TMVA::MethodBase::ReadClassesFromXML( void* clsnode )
1909 {
1910  UInt_t readNCls;
1911  // coverity[tainted_data_argument]
1912  gTools().ReadAttr( clsnode, "NClass", readNCls);
1913 
1914  TString className="";
1915  UInt_t classIndex=0;
1916  void* ch = gTools().GetChild(clsnode);
1917  if (!ch) {
1918  for (UInt_t icls = 0; icls<readNCls;++icls) {
1919  TString classname = Form("class%i",icls);
1920  DataInfo().AddClass(classname);
1921 
1922  }
1923  }
1924  else{
1925  while (ch) {
1926  gTools().ReadAttr( ch, "Index", classIndex);
1927  gTools().ReadAttr( ch, "Name", className );
1928  DataInfo().AddClass(className);
1929 
1930  ch = gTools().GetNextChild(ch);
1931  }
1932  }
1933 
1934  // retrieve signal and background class index
1935  if (DataInfo().GetClassInfo("Signal") != 0) {
1936  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1937  }
1938  else
1939  fSignalClass=0;
1940  if (DataInfo().GetClassInfo("Background") != 0) {
1941  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
1942  }
1943  else
1944  fBackgroundClass=1;
1945 }
1946 
1947 ////////////////////////////////////////////////////////////////////////////////
1948 /// read target info from XML
1949 
1950 void TMVA::MethodBase::ReadTargetsFromXML( void* tarnode )
1951 {
1952  UInt_t readNTar;
1953  gTools().ReadAttr( tarnode, "NTrgt", readNTar);
1954 
1955  int tarIdx = 0;
1956  TString expression;
1957  void* ch = gTools().GetChild(tarnode);
1958  while (ch) {
1959  gTools().ReadAttr( ch, "TargetIndex", tarIdx);
1960  gTools().ReadAttr( ch, "Expression", expression);
1961  DataInfo().AddTarget(expression,"","",0,0);
1962 
1963  ch = gTools().GetNextChild(ch);
1964  }
1965 }
1966 
1967 ////////////////////////////////////////////////////////////////////////////////
1968 /// returns the ROOT directory where info/histograms etc of the
1969 /// corresponding MVA method instance are stored
1970 
1971 TDirectory* TMVA::MethodBase::BaseDir() const
1972 {
1973  if (fBaseDir != 0) return fBaseDir;
1974  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
1975 
1976  if (IsSilentFile()) {
1977  Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
1978  << "MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
1979  "output when creating the factory"
1980  << Endl;
1981  }
1982 
1983  TDirectory* methodDir = MethodBaseDir();
1984  if (methodDir==0)
1985  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
1986 
1987  TString defaultDir = GetMethodName();
1988  TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
1989  if(!sdir)
1990  {
1991  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
1992  sdir = methodDir->mkdir(defaultDir);
1993  sdir->cd();
1994  // write weight file name into target file
1995  if (fModelPersistence) {
1996  TObjString wfilePath( gSystem->WorkingDirectory() );
1997  TObjString wfileName( GetWeightFileName() );
1998  wfilePath.Write( "TrainingPath" );
1999  wfileName.Write( "WeightFileName" );
2000  }
2001  }
2002 
2003  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
2004  return sdir;
2005 }
2006 
2007 ////////////////////////////////////////////////////////////////////////////////
2008 /// returns the ROOT directory where all instances of the
2009 /// corresponding MVA method are stored
2010 
2011 TDirectory *TMVA::MethodBase::MethodBaseDir() const
2012 {
2013  if (fMethodBaseDir != 0) {
2014  return fMethodBaseDir;
2015  }
2016 
2017  const char *datasetName = DataInfo().GetName();
2018 
2019  Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodTypeName()
2020  << " not set yet --> check if already there.." << Endl;
2021 
2022  TDirectory *factoryBaseDir = GetFile();
2023  if (!factoryBaseDir) return nullptr;
2024  fMethodBaseDir = factoryBaseDir->GetDirectory(datasetName);
2025  if (!fMethodBaseDir) {
2026  fMethodBaseDir = factoryBaseDir->mkdir(datasetName, Form("Base directory for dataset %s", datasetName));
2027  if (!fMethodBaseDir) {
2028  Log() << kFATAL << "Can not create dir " << datasetName;
2029  }
2030  }
2031  TString methodTypeDir = Form("Method_%s", GetMethodTypeName().Data());
2032  fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.Data());
2033 
2034  if (!fMethodBaseDir) {
2035  TDirectory *datasetDir = factoryBaseDir->GetDirectory(datasetName);
2036  TString methodTypeDirHelpStr = Form("Directory for all %s methods", GetMethodTypeName().Data());
2037  fMethodBaseDir = datasetDir->mkdir(methodTypeDir.Data(), methodTypeDirHelpStr);
2038  Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodName()
2039  << " does not exist yet--> created it" << Endl;
2040  }
2041 
2042  Log() << kDEBUG << Form("Dataset[%s] : ", datasetName)
2043  << "Return from MethodBaseDir() after creating base directory " << Endl;
2044  return fMethodBaseDir;
2045 }
2046 
2047 ////////////////////////////////////////////////////////////////////////////////
2048 /// set directory of weight file
2049 
2050 void TMVA::MethodBase::SetWeightFileDir( TString fileDir )
2051 {
2052  fFileDir = fileDir;
2053  gSystem->mkdir( fFileDir, kTRUE );
2054 }
2055 
2056 ////////////////////////////////////////////////////////////////////////////////
2057 /// set the weight file name (depreciated)
2058 
2059 void TMVA::MethodBase::SetWeightFileName( TString theWeightFile)
2060 {
2061  fWeightFile = theWeightFile;
2062 }
2063 
2064 ////////////////////////////////////////////////////////////////////////////////
2065 /// retrieve weight file name
2066 
2067 TString TMVA::MethodBase::GetWeightFileName() const
2068 {
2069  if (fWeightFile!="") return fWeightFile;
2070 
2071  // the default consists of
2072  // directory/jobname_methodname_suffix.extension.{root/txt}
2073  TString suffix = "";
2074  TString wFileDir(GetWeightFileDir());
2075  TString wFileName = GetJobName() + "_" + GetMethodName() +
2076  suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml";
2077  if (wFileDir.IsNull() ) return wFileName;
2078  // add weight file directory of it is not null
2079  return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2080  + wFileName );
2081 }
2082 ////////////////////////////////////////////////////////////////////////////////
2083 /// writes all MVA evaluation histograms to file
2084 
2085 void TMVA::MethodBase::WriteEvaluationHistosToFile(Types::ETreeType treetype)
2086 {
2087  BaseDir()->cd();
2088 
2089 
2090  // write MVA PDFs to file - if exist
2091  if (0 != fMVAPdfS) {
2092  fMVAPdfS->GetOriginalHist()->Write();
2093  fMVAPdfS->GetSmoothedHist()->Write();
2094  fMVAPdfS->GetPDFHist()->Write();
2095  }
2096  if (0 != fMVAPdfB) {
2097  fMVAPdfB->GetOriginalHist()->Write();
2098  fMVAPdfB->GetSmoothedHist()->Write();
2099  fMVAPdfB->GetPDFHist()->Write();
2100  }
2101 
2102  // write result-histograms
2103  Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2104  if (!results)
2105  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2106  << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2107  << "/kMaxAnalysisType" << Endl;
2108  results->GetStorage()->Write();
2109  if (treetype==Types::kTesting) {
2110  // skipping plotting of variables if too many (default is 200)
2111  if ((int) DataInfo().GetNVariables()< gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables)
2112  GetTransformationHandler().PlotVariables (GetEventCollection( Types::kTesting ), BaseDir() );
2113  else
2114  Log() << kINFO << TString::Format("Dataset[%s] : ",DataInfo().GetName())
2115  << " variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2116  << " , it is larger than " << gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables << Endl;
2117  }
2118 }
2119 
2120 ////////////////////////////////////////////////////////////////////////////////
2121 /// write special monitoring histograms to file
2122 /// dummy implementation here -----------------
2123 
2124 void TMVA::MethodBase::WriteMonitoringHistosToFile( void ) const
2125 {
2126 }
2127 
2128 ////////////////////////////////////////////////////////////////////////////////
2129 /// reads one line from the input stream
2130 /// checks for certain keywords and interprets
2131 /// the line if keywords are found
2132 
2133 Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2134 {
2135  fin.getline(buf,512);
2136  TString line(buf);
2137  if (line.BeginsWith("TMVA Release")) {
2138  Ssiz_t start = line.First('[')+1;
2139  Ssiz_t length = line.Index("]",start)-start;
2140  TString code = line(start,length);
2141  std::stringstream s(code.Data());
2142  s >> fTMVATrainingVersion;
2143  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2144  }
2145  if (line.BeginsWith("ROOT Release")) {
2146  Ssiz_t start = line.First('[')+1;
2147  Ssiz_t length = line.Index("]",start)-start;
2148  TString code = line(start,length);
2149  std::stringstream s(code.Data());
2150  s >> fROOTTrainingVersion;
2151  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2152  }
2153  if (line.BeginsWith("Analysis type")) {
2154  Ssiz_t start = line.First('[')+1;
2155  Ssiz_t length = line.Index("]",start)-start;
2156  TString code = line(start,length);
2157  std::stringstream s(code.Data());
2158  std::string analysisType;
2159  s >> analysisType;
2160  if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2161  else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2162  else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2163  else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2164 
2165  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2166  << (GetAnalysisType() == Types::kRegression ? "Regression" :
2167  (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2168  }
2169 
2170  return true;
2171 }
2172 
2173 ////////////////////////////////////////////////////////////////////////////////
2174 /// Create PDFs of the MVA output variables
2175 
2176 void TMVA::MethodBase::CreateMVAPdfs()
2177 {
2178  Data()->SetCurrentType(Types::kTraining);
2179 
2180  // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2181  // otherwise they will be only used 'online'
2182  ResultsClassification * mvaRes = dynamic_cast<ResultsClassification*>
2183  ( Data()->GetResults(GetMethodName(), Types::kTraining, Types::kClassification) );
2184 
2185  if (mvaRes==0 || mvaRes->GetSize()==0) {
2186  Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2187  }
2188 
2189  Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2190  Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2191 
2192  // create histograms that serve as basis to create the MVA Pdfs
2193  TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2194  fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2195  TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2196  fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2197 
2198 
2199  // compute sum of weights properly
2200  histMVAPdfS->Sumw2();
2201  histMVAPdfB->Sumw2();
2202 
2203  // fill histograms
2204  for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2205  Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2206  Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2207 
2208  if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2209  else histMVAPdfB->Fill( theVal, theWeight );
2210  }
2211 
2212  gTools().NormHist( histMVAPdfS );
2213  gTools().NormHist( histMVAPdfB );
2214 
2215  // momentary hack for ROOT problem
2216  if(!IsSilentFile())
2217  {
2218  histMVAPdfS->Write();
2219  histMVAPdfB->Write();
2220  }
2221  // create PDFs
2222  fMVAPdfS->BuildPDF ( histMVAPdfS );
2223  fMVAPdfB->BuildPDF ( histMVAPdfB );
2224  fMVAPdfS->ValidatePDF( histMVAPdfS );
2225  fMVAPdfB->ValidatePDF( histMVAPdfB );
2226 
2227  if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2228  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2229  << Form( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2230  GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2231  << Endl;
2232  }
2233 
2234  delete histMVAPdfS;
2235  delete histMVAPdfB;
2236 }
2237 
2238 Double_t TMVA::MethodBase::GetProba(const Event *ev){
2239  // the simple one, automatically calculates the mvaVal and uses the
2240  // SAME sig/bkg ratio as given in the training sample (typically 50/50
2241  // .. (NormMode=EqualNumEvents) but can be different)
2242  if (!fMVAPdfS || !fMVAPdfB) {
2243  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2244  CreateMVAPdfs();
2245  }
2246  Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2247  Double_t mvaVal = GetMvaValue(ev);
2248 
2249  return GetProba(mvaVal,sigFraction);
2250 
2251 }
2252 ////////////////////////////////////////////////////////////////////////////////
2253 /// compute likelihood ratio
2254 
2255 Double_t TMVA::MethodBase::GetProba( Double_t mvaVal, Double_t ap_sig )
2256 {
2257  if (!fMVAPdfS || !fMVAPdfB) {
2258  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2259  return -1.0;
2260  }
2261  Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2262  Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2263 
2264  Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2265 
2266  return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2267 }
2268 
2269 ////////////////////////////////////////////////////////////////////////////////
2270 /// compute rarity:
2271 /// \f[
2272 /// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2273 /// \f]
2274 /// where PDF(x) is the PDF of the classifier's signal or background distribution
2275 
2276 Double_t TMVA::MethodBase::GetRarity( Double_t mvaVal, Types::ESBType reftype ) const
2277 {
2278  if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2279  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2280  << "select option \"CreateMVAPdfs\"" << Endl;
2281  return 0.0;
2282  }
2283 
2284  PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2285 
2286  return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2287 }
2288 
2289 ////////////////////////////////////////////////////////////////////////////////
2290 /// fill background efficiency (resp. rejection) versus signal efficiency plots
2291 /// returns signal efficiency at background efficiency indicated in theString
2292 
2293 Double_t TMVA::MethodBase::GetEfficiency( const TString& theString, Types::ETreeType type,Double_t& effSerr )
2294 {
2295  Data()->SetCurrentType(type);
2296  Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2297  std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2298 
2299  // parse input string for required background efficiency
2300  TList* list = gTools().ParseFormatLine( theString );
2301 
2302  // sanity check
2303  Bool_t computeArea = kFALSE;
2304  if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2305  else if (list->GetSize() > 2) {
2306  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2307  << " in string: " << theString
2308  << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2309  delete list;
2310  return -1;
2311  }
2312 
2313  // sanity check
2314  if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2315  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2316  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2317  delete list;
2318  return -1.0;
2319  }
2320 
2321  // create histograms
2322 
2323  // first, get efficiency histograms for signal and background
2324  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2325  Double_t xmin = effhist->GetXaxis()->GetXmin();
2326  Double_t xmax = effhist->GetXaxis()->GetXmax();
2327 
2328  TTHREAD_TLS(Double_t) nevtS;
2329 
2330  // first round ? --> create histograms
2331  if (results->DoesExist("MVA_EFF_S")==0) {
2332 
2333  // for efficiency plot
2334  TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2335  TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2336  results->Store(eff_s, "MVA_EFF_S");
2337  results->Store(eff_b, "MVA_EFF_B");
2338 
2339  // sign if cut
2340  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2341 
2342  // this method is unbinned
2343  nevtS = 0;
2344  for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2345 
2346  // read the tree
2347  Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2348  Float_t theWeight = GetEvent(ievt)->GetWeight();
2349  Float_t theVal = (*mvaRes)[ievt];
2350 
2351  // select histogram depending on if sig or bgd
2352  TH1* theHist = isSignal ? eff_s : eff_b;
2353 
2354  // count signal and background events in tree
2355  if (isSignal) nevtS+=theWeight;
2356 
2357  TAxis* axis = theHist->GetXaxis();
2358  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2359  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2360  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2361  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2362  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2363 
2364  if (sign > 0)
2365  for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2366  else if (sign < 0)
2367  for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2368  else
2369  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2370  }
2371 
2372  // renormalise maximum to <=1
2373  // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2374  // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2375 
2376  eff_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_s->GetMaximum()) );
2377  eff_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_b->GetMaximum()) );
2378 
2379  // background efficiency versus signal efficiency
2380  TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2381  results->Store(eff_BvsS, "MVA_EFF_BvsS");
2382  eff_BvsS->SetXTitle( "Signal eff" );
2383  eff_BvsS->SetYTitle( "Backgr eff" );
2384 
2385  // background rejection (=1-eff.) versus signal efficiency
2386  TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2387  results->Store(rej_BvsS);
2388  rej_BvsS->SetXTitle( "Signal eff" );
2389  rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2390 
2391  // inverse background eff (1/eff.) versus signal efficiency
2392  TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2393  GetTestvarName(), fNbins, 0, 1 );
2394  results->Store(inveff_BvsS);
2395  inveff_BvsS->SetXTitle( "Signal eff" );
2396  inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2397 
2398  // use root finder
2399  // spline background efficiency plot
2400  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2401  if (Use_Splines_for_Eff_) {
2402  fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2403  fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2404 
2405  // verify spline sanity
2406  gTools().CheckSplines( eff_s, fSplRefS );
2407  gTools().CheckSplines( eff_b, fSplRefB );
2408  }
2409 
2410  // make the background-vs-signal efficiency plot
2411 
2412  // create root finder
2413  RootFinder rootFinder( this, fXmin, fXmax );
2414 
2415  Double_t effB = 0;
2416  fEffS = eff_s; // to be set for the root finder
2417  for (Int_t bini=1; bini<=fNbins; bini++) {
2418 
2419  // find cut value corresponding to a given signal efficiency
2420  Double_t effS = eff_BvsS->GetBinCenter( bini );
2421  Double_t cut = rootFinder.Root( effS );
2422 
2423  // retrieve background efficiency for given cut
2424  if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2425  else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2426 
2427  // and fill histograms
2428  eff_BvsS->SetBinContent( bini, effB );
2429  rej_BvsS->SetBinContent( bini, 1.0-effB );
2430  if (effB>std::numeric_limits<double>::epsilon())
2431  inveff_BvsS->SetBinContent( bini, 1.0/effB );
2432  }
2433 
2434  // create splines for histogram
2435  fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2436 
2437  // search for overlap point where, when cutting on it,
2438  // one would obtain: eff_S = rej_B = 1 - eff_B
2439  Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2440  Int_t nbins_ = 5000;
2441  for (Int_t bini=1; bini<=nbins_; bini++) {
2442 
2443  // get corresponding signal and background efficiencies
2444  effS = (bini - 0.5)/Float_t(nbins_);
2445  rejB = 1.0 - fSpleffBvsS->Eval( effS );
2446 
2447  // find signal efficiency that corresponds to required background efficiency
2448  if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2449  effS_ = effS;
2450  rejB_ = rejB;
2451  }
2452 
2453  // find cut that corresponds to signal efficiency and update signal-like criterion
2454  Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2455  SetSignalReferenceCut( cut );
2456  fEffS = 0;
2457  }
2458 
2459  // must exist...
2460  if (0 == fSpleffBvsS) {
2461  delete list;
2462  return 0.0;
2463  }
2464 
2465  // now find signal efficiency that corresponds to required background efficiency
2466  Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2467  Int_t nbins_ = 1000;
2468 
2469  if (computeArea) {
2470 
2471  // compute area of rej-vs-eff plot
2472  Double_t integral = 0;
2473  for (Int_t bini=1; bini<=nbins_; bini++) {
2474 
2475  // get corresponding signal and background efficiencies
2476  effS = (bini - 0.5)/Float_t(nbins_);
2477  effB = fSpleffBvsS->Eval( effS );
2478  integral += (1.0 - effB);
2479  }
2480  integral /= nbins_;
2481 
2482  delete list;
2483  return integral;
2484  }
2485  else {
2486 
2487  // that will be the value of the efficiency retured (does not affect
2488  // the efficiency-vs-bkg plot which is done anyway.
2489  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2490 
2491  // find precise efficiency value
2492  for (Int_t bini=1; bini<=nbins_; bini++) {
2493 
2494  // get corresponding signal and background efficiencies
2495  effS = (bini - 0.5)/Float_t(nbins_);
2496  effB = fSpleffBvsS->Eval( effS );
2497 
2498  // find signal efficiency that corresponds to required background efficiency
2499  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2500  effS_ = effS;
2501  effB_ = effB;
2502  }
2503 
2504  // take mean between bin above and bin below
2505  effS = 0.5*(effS + effS_);
2506 
2507  effSerr = 0;
2508  if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2509 
2510  delete list;
2511  return effS;
2512  }
2513 
2514  return -1;
2515 }
2516 
2517 ////////////////////////////////////////////////////////////////////////////////
2518 
2519 Double_t TMVA::MethodBase::GetTrainingEfficiency(const TString& theString)
2520 {
2521  Data()->SetCurrentType(Types::kTraining);
2522 
2523  Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, Types::kNoAnalysisType);
2524 
2525  // fill background efficiency (resp. rejection) versus signal efficiency plots
2526  // returns signal efficiency at background efficiency indicated in theString
2527 
2528  // parse input string for required background efficiency
2529  TList* list = gTools().ParseFormatLine( theString );
2530  // sanity check
2531 
2532  if (list->GetSize() != 2) {
2533  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2534  << " in string: " << theString
2535  << " | required format, e.g., Efficiency:0.05" << Endl;
2536  delete list;
2537  return -1;
2538  }
2539  // that will be the value of the efficiency retured (does not affect
2540  // the efficiency-vs-bkg plot which is done anyway.
2541  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2542 
2543  delete list;
2544 
2545  // sanity check
2546  if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2547  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2548  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2549  << Endl;
2550  return -1.0;
2551  }
2552 
2553  // create histogram
2554 
2555  // first, get efficiency histograms for signal and background
2556  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2557  Double_t xmin = effhist->GetXaxis()->GetXmin();
2558  Double_t xmax = effhist->GetXaxis()->GetXmax();
2559 
2560  // first round ? --> create and fill histograms
2561  if (results->DoesExist("MVA_TRAIN_S")==0) {
2562 
2563  // classifier response distributions for test sample
2564  Double_t sxmax = fXmax+0.00001;
2565 
2566  // MVA plots on the training sample (check for overtraining)
2567  TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2568  TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2569  results->Store(mva_s_tr, "MVA_TRAIN_S");
2570  results->Store(mva_b_tr, "MVA_TRAIN_B");
2571  mva_s_tr->Sumw2();
2572  mva_b_tr->Sumw2();
2573 
2574  // Training efficiency plots
2575  TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2576  fNbinsH, xmin, xmax );
2577  TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2578  fNbinsH, xmin, xmax );
2579  results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2580  results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2581 
2582  // sign if cut
2583  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2584 
2585  std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2586  assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2587 
2588  // this method is unbinned
2589  for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2590 
2591  Data()->SetCurrentEvent(ievt);
2592  const Event* ev = GetEvent();
2593 
2594  Double_t theVal = mvaValues[ievt];
2595  Double_t theWeight = ev->GetWeight();
2596 
2597  TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2598  TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2599 
2600  theClsHist->Fill( theVal, theWeight );
2601 
2602  TAxis* axis = theEffHist->GetXaxis();
2603  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2604  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2605  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2606  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2607  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2608 
2609  if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2610  else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2611  }
2612 
2613  // normalise output distributions
2614  // uncomment those (and several others if you want unnormalized output
2615  gTools().NormHist( mva_s_tr );
2616  gTools().NormHist( mva_b_tr );
2617 
2618  // renormalise to maximum
2619  mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2620  mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2621 
2622  // Training background efficiency versus signal efficiency
2623  TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2624  // Training background rejection (=1-eff.) versus signal efficiency
2625  TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2626  results->Store(eff_bvss, "EFF_BVSS_TR");
2627  results->Store(rej_bvss, "REJ_BVSS_TR");
2628 
2629  // use root finder
2630  // spline background efficiency plot
2631  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2632  if (Use_Splines_for_Eff_) {
2633  if (fSplTrainRefS) delete fSplTrainRefS;
2634  if (fSplTrainRefB) delete fSplTrainRefB;
2635  fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2636  fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2637 
2638  // verify spline sanity
2639  gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2640  gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2641  }
2642 
2643  // make the background-vs-signal efficiency plot
2644 
2645  // create root finder
2646  RootFinder rootFinder(this, fXmin, fXmax );
2647 
2648  Double_t effB = 0;
2649  fEffS = results->GetHist("MVA_TRAINEFF_S");
2650  for (Int_t bini=1; bini<=fNbins; bini++) {
2651 
2652  // find cut value corresponding to a given signal efficiency
2653  Double_t effS = eff_bvss->GetBinCenter( bini );
2654 
2655  Double_t cut = rootFinder.Root( effS );
2656 
2657  // retrieve background efficiency for given cut
2658  if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2659  else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2660 
2661  // and fill histograms
2662  eff_bvss->SetBinContent( bini, effB );
2663  rej_bvss->SetBinContent( bini, 1.0-effB );
2664  }
2665  fEffS = 0;
2666 
2667  // create splines for histogram
2668  fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2669  }
2670 
2671  // must exist...
2672  if (0 == fSplTrainEffBvsS) return 0.0;
2673 
2674  // now find signal efficiency that corresponds to required background efficiency
2675  Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2676  Int_t nbins_ = 1000;
2677  for (Int_t bini=1; bini<=nbins_; bini++) {
2678 
2679  // get corresponding signal and background efficiencies
2680  effS = (bini - 0.5)/Float_t(nbins_);
2681  effB = fSplTrainEffBvsS->Eval( effS );
2682 
2683  // find signal efficiency that corresponds to required background efficiency
2684  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2685  effS_ = effS;
2686  effB_ = effB;
2687  }
2688 
2689  return 0.5*(effS + effS_); // the mean between bin above and bin below
2690 }
2691 
2692 ////////////////////////////////////////////////////////////////////////////////
2693 
2694 std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2695 {
2696  Data()->SetCurrentType(Types::kTesting);
2697  ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
2698  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2699 
2700  purity.push_back(resMulticlass->GetAchievablePur());
2701  return resMulticlass->GetAchievableEff();
2702 }
2703 
2704 ////////////////////////////////////////////////////////////////////////////////
2705 
2706 std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2707 {
2708  Data()->SetCurrentType(Types::kTraining);
2709  ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMulticlass));
2710  if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2711 
2712  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2713  for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2714  resMulticlass->GetBestMultiClassCuts(icls);
2715  }
2716 
2717  purity.push_back(resMulticlass->GetAchievablePur());
2718  return resMulticlass->GetAchievableEff();
2719 }
2720 
2721 ////////////////////////////////////////////////////////////////////////////////
2722 /// Construct a confusion matrix for a multiclass classifier. The confusion
2723 /// matrix compares, in turn, each class agaist all other classes in a pair-wise
2724 /// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2725 /// considered signal for the sake of comparison and for each column
2726 /// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2727 ///
2728 /// Note that the diagonal elements will be returned as NaN since this will
2729 /// compare a class against itself.
2730 ///
2731 /// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2732 ///
2733 /// \param[in] effB The background efficiency for which to evaluate.
2734 /// \param[in] type The data set on which to evaluate (training, testing ...).
2735 ///
2736 /// \return A matrix containing signal efficiencies for the given background
2737 /// efficiency. The diagonal elements are NaN since this measure is
2738 /// meaningless (comparing a class against itself).
2739 ///
2740 
2741 TMatrixD TMVA::MethodBase::GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
2742 {
2743  if (GetAnalysisType() != Types::kMulticlass) {
2744  Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2745  return TMatrixD(0, 0);
2746  }
2747 
2748  Data()->SetCurrentType(type);
2749  ResultsMulticlass *resMulticlass =
2750  dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2751 
2752  if (resMulticlass == nullptr) {
2753  Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2754  << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2755  return TMatrixD(0, 0);
2756  }
2757 
2758  return resMulticlass->GetConfusionMatrix(effB);
2759 }
2760 
2761 ////////////////////////////////////////////////////////////////////////////////
2762 /// compute significance of mean difference
2763 /// \f[
2764 /// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2765 /// \f]
2766 
2767 Double_t TMVA::MethodBase::GetSignificance( void ) const
2768 {
2769  Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2770 
2771  return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2772 }
2773 
2774 ////////////////////////////////////////////////////////////////////////////////
2775 /// compute "separation" defined as
2776 /// \f[
2777 /// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2778 /// \f]
2779 
2780 Double_t TMVA::MethodBase::GetSeparation( TH1* histoS, TH1* histoB ) const
2781 {
2782  return gTools().GetSeparation( histoS, histoB );
2783 }
2784 
2785 ////////////////////////////////////////////////////////////////////////////////
2786 /// compute "separation" defined as
2787 /// \f[
2788 /// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2789 /// \f]
2790 
2791 Double_t TMVA::MethodBase::GetSeparation( PDF* pdfS, PDF* pdfB ) const
2792 {
2793  // note, if zero pointers given, use internal pdf
2794  // sanity check first
2795  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2796  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2797  if (!pdfS) pdfS = fSplS;
2798  if (!pdfB) pdfB = fSplB;
2799 
2800  if (!fSplS || !fSplB) {
2801  Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2802  << " fSplS or fSplB are not yet filled" << Endl;
2803  return 0;
2804  }else{
2805  return gTools().GetSeparation( *pdfS, *pdfB );
2806  }
2807 }
2808 
2809 ////////////////////////////////////////////////////////////////////////////////
2810 /// calculate the area (integral) under the ROC curve as a
2811 /// overall quality measure of the classification
2812 
2813 Double_t TMVA::MethodBase::GetROCIntegral(TH1D *histS, TH1D *histB) const
2814 {
2815  // note, if zero pointers given, use internal pdf
2816  // sanity check first
2817  if ((!histS && histB) || (histS && !histB))
2818  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2819 
2820  if (histS==0 || histB==0) return 0.;
2821 
2822  TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2823  TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2824 
2825 
2826  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2827  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2828 
2829  Double_t integral = 0;
2830  UInt_t nsteps = 1000;
2831  Double_t step = (xmax-xmin)/Double_t(nsteps);
2832  Double_t cut = xmin;
2833  for (UInt_t i=0; i<nsteps; i++) {
2834  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2835  cut+=step;
2836  }
2837  delete pdfS;
2838  delete pdfB;
2839  return integral*step;
2840 }
2841 
2842 
2843 ////////////////////////////////////////////////////////////////////////////////
2844 /// calculate the area (integral) under the ROC curve as a
2845 /// overall quality measure of the classification
2846 
2847 Double_t TMVA::MethodBase::GetROCIntegral(PDF *pdfS, PDF *pdfB) const
2848 {
2849  // note, if zero pointers given, use internal pdf
2850  // sanity check first
2851  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2852  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2853  if (!pdfS) pdfS = fSplS;
2854  if (!pdfB) pdfB = fSplB;
2855 
2856  if (pdfS==0 || pdfB==0) return 0.;
2857 
2858  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2859  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2860 
2861  Double_t integral = 0;
2862  UInt_t nsteps = 1000;
2863  Double_t step = (xmax-xmin)/Double_t(nsteps);
2864  Double_t cut = xmin;
2865  for (UInt_t i=0; i<nsteps; i++) {
2866  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2867  cut+=step;
2868  }
2869  return integral*step;
2870 }
2871 
2872 ////////////////////////////////////////////////////////////////////////////////
2873 /// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2874 /// of signal and background events; returns cut for maximum significance
2875 /// also returned via reference is the maximum significance
2876 
2877 Double_t TMVA::MethodBase::GetMaximumSignificance( Double_t SignalEvents,
2878  Double_t BackgroundEvents,
2879  Double_t& max_significance_value ) const
2880 {
2881  Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, Types::kMaxAnalysisType );
2882 
2883  Double_t max_significance(0);
2884  Double_t effS(0),effB(0),significance(0);
2885  TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2886 
2887  if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2888  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2889  << "Number of signal or background events is <= 0 ==> abort"
2890  << Endl;
2891  }
2892 
2893  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2894  << SignalEvents/BackgroundEvents << Endl;
2895 
2896  TH1* eff_s = results->GetHist("MVA_EFF_S");
2897  TH1* eff_b = results->GetHist("MVA_EFF_B");
2898 
2899  if ( (eff_s==0) || (eff_b==0) ) {
2900  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2901  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2902  return 0;
2903  }
2904 
2905  for (Int_t bin=1; bin<=fNbinsH; bin++) {
2906  effS = eff_s->GetBinContent( bin );
2907  effB = eff_b->GetBinContent( bin );
2908 
2909  // put significance into a histogram
2910  significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2911 
2912  temp_histogram->SetBinContent(bin,significance);
2913  }
2914 
2915  // find maximum in histogram
2916  max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2917  max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2918 
2919  // delete
2920  delete temp_histogram;
2921 
2922  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
2923  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
2924 
2925  return max_significance;
2926 }
2927 
2928 ////////////////////////////////////////////////////////////////////////////////
2929 /// calculates rms,mean, xmin, xmax of the event variable
2930 /// this can be either done for the variables as they are or for
2931 /// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
2932 
2933 void TMVA::MethodBase::Statistics( Types::ETreeType treeType, const TString& theVarName,
2934  Double_t& meanS, Double_t& meanB,
2935  Double_t& rmsS, Double_t& rmsB,
2936  Double_t& xmin, Double_t& xmax )
2937 {
2938  Types::ETreeType previousTreeType = Data()->GetCurrentType();
2939  Data()->SetCurrentType(treeType);
2940 
2941  Long64_t entries = Data()->GetNEvents();
2942 
2943  // sanity check
2944  if (entries <=0)
2945  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
2946 
2947  // index of the wanted variable
2948  UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2949 
2950  // first fill signal and background in arrays before analysis
2951  xmin = +DBL_MAX;
2952  xmax = -DBL_MAX;
2953  Long64_t nEventsS = -1;
2954  Long64_t nEventsB = -1;
2955 
2956  // take into account event weights
2957  meanS = 0;
2958  meanB = 0;
2959  rmsS = 0;
2960  rmsB = 0;
2961  Double_t sumwS = 0, sumwB = 0;
2962 
2963  // loop over all training events
2964  for (Int_t ievt = 0; ievt < entries; ievt++) {
2965 
2966  const Event* ev = GetEvent(ievt);
2967 
2968  Double_t theVar = ev->GetValue(varIndex);
2969  Double_t weight = ev->GetWeight();
2970 
2971  if (DataInfo().IsSignal(ev)) {
2972  sumwS += weight;
2973  meanS += weight*theVar;
2974  rmsS += weight*theVar*theVar;
2975  }
2976  else {
2977  sumwB += weight;
2978  meanB += weight*theVar;
2979  rmsB += weight*theVar*theVar;
2980  }
2981  xmin = TMath::Min( xmin, theVar );
2982  xmax = TMath::Max( xmax, theVar );
2983  }
2984  ++nEventsS;
2985  ++nEventsB;
2986 
2987  meanS = meanS/sumwS;
2988  meanB = meanB/sumwB;
2989  rmsS = TMath::Sqrt( rmsS/sumwS - meanS*meanS );
2990  rmsB = TMath::Sqrt( rmsB/sumwB - meanB*meanB );
2991 
2992  Data()->SetCurrentType(previousTreeType);
2993 }
2994 
2995 ////////////////////////////////////////////////////////////////////////////////
2996 /// create reader class for method (classification only at present)
2997 
2998 void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const
2999 {
3000  // the default consists of
3001  TString classFileName = "";
3002  if (theClassFileName == "")
3003  classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
3004  else
3005  classFileName = theClassFileName;
3006 
3007  TString className = TString("Read") + GetMethodName();
3008 
3009  TString tfname( classFileName );
3010  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
3011  << "Creating standalone class: "
3012  << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
3013 
3014  std::ofstream fout( classFileName );
3015  if (!fout.good()) { // file could not be opened --> Error
3016  Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
3017  }
3018 
3019  // now create the class
3020  // preamble
3021  fout << "// Class: " << className << std::endl;
3022  fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
3023 
3024  // print general information and configuration state
3025  fout << std::endl;
3026  fout << "/* configuration options =====================================================" << std::endl << std::endl;
3027  WriteStateToStream( fout );
3028  fout << std::endl;
3029  fout << "============================================================================ */" << std::endl;
3030 
3031  // generate the class
3032  fout << "" << std::endl;
3033  fout << "#include <array>" << std::endl;
3034  fout << "#include <vector>" << std::endl;
3035  fout << "#include <cmath>" << std::endl;
3036  fout << "#include <string>" << std::endl;
3037  fout << "#include <iostream>" << std::endl;
3038  fout << "" << std::endl;
3039  // now if the classifier needs to write some additional classes for its response implementation
3040  // this code goes here: (at least the header declarations need to come before the main class
3041  this->MakeClassSpecificHeader( fout, className );
3042 
3043  fout << "#ifndef IClassifierReader__def" << std::endl;
3044  fout << "#define IClassifierReader__def" << std::endl;
3045  fout << std::endl;
3046  fout << "class IClassifierReader {" << std::endl;
3047  fout << std::endl;
3048  fout << " public:" << std::endl;
3049  fout << std::endl;
3050  fout << " // constructor" << std::endl;
3051  fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3052  fout << " virtual ~IClassifierReader() {}" << std::endl;
3053  fout << std::endl;
3054  fout << " // return classifier response" << std::endl;
3055  if(GetAnalysisType() == Types::kMulticlass) {
3056  fout << " virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3057  } else {
3058  fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3059  }
3060  fout << std::endl;
3061  fout << " // returns classifier status" << std::endl;
3062  fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3063  fout << std::endl;
3064  fout << " protected:" << std::endl;
3065  fout << std::endl;
3066  fout << " bool fStatusIsClean;" << std::endl;
3067  fout << "};" << std::endl;
3068  fout << std::endl;
3069  fout << "#endif" << std::endl;
3070  fout << std::endl;
3071  fout << "class " << className << " : public IClassifierReader {" << std::endl;
3072  fout << std::endl;
3073  fout << " public:" << std::endl;
3074  fout << std::endl;
3075  fout << " // constructor" << std::endl;
3076  fout << " " << className << "( std::vector<std::string>& theInputVars )" << std::endl;
3077  fout << " : IClassifierReader()," << std::endl;
3078  fout << " fClassName( \"" << className << "\" )," << std::endl;
3079  fout << " fNvars( " << GetNvar() << " )" << std::endl;
3080  fout << " {" << std::endl;
3081  fout << " // the training input variables" << std::endl;
3082  fout << " const char* inputVars[] = { ";
3083  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3084  fout << "\"" << GetOriginalVarName(ivar) << "\"";
3085  if (ivar<GetNvar()-1) fout << ", ";
3086  }
3087  fout << " };" << std::endl;
3088  fout << std::endl;
3089  fout << " // sanity checks" << std::endl;
3090  fout << " if (theInputVars.size() <= 0) {" << std::endl;
3091  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3092  fout << " fStatusIsClean = false;" << std::endl;
3093  fout << " }" << std::endl;
3094  fout << std::endl;
3095  fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3096  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3097  fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3098  fout << " fStatusIsClean = false;" << std::endl;
3099  fout << " }" << std::endl;
3100  fout << std::endl;
3101  fout << " // validate input variables" << std::endl;
3102  fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3103  fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3104  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3105  fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3106  fout << " fStatusIsClean = false;" << std::endl;
3107  fout << " }" << std::endl;
3108  fout << " }" << std::endl;
3109  fout << std::endl;
3110  fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3111  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3112  fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3113  fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3114  }
3115  fout << std::endl;
3116  fout << " // initialize input variable types" << std::endl;
3117  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3118  fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3119  }
3120  fout << std::endl;
3121  fout << " // initialize constants" << std::endl;
3122  fout << " Initialize();" << std::endl;
3123  fout << std::endl;
3124  if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3125  fout << " // initialize transformation" << std::endl;
3126  fout << " InitTransform();" << std::endl;
3127  }
3128  fout << " }" << std::endl;
3129  fout << std::endl;
3130  fout << " // destructor" << std::endl;
3131  fout << " virtual ~" << className << "() {" << std::endl;
3132  fout << " Clear(); // method-specific" << std::endl;
3133  fout << " }" << std::endl;
3134  fout << std::endl;
3135  fout << " // the classifier response" << std::endl;
3136  fout << " // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3137  fout << " // variables given to the constructor" << std::endl;
3138  if(GetAnalysisType() == Types::kMulticlass) {
3139  fout << " std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3140  } else {
3141  fout << " double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3142  }
3143  fout << std::endl;
3144  fout << " private:" << std::endl;
3145  fout << std::endl;
3146  fout << " // method-specific destructor" << std::endl;
3147  fout << " void Clear();" << std::endl;
3148  fout << std::endl;
3149  if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3150  fout << " // input variable transformation" << std::endl;
3151  GetTransformationHandler().MakeFunction(fout, className,1);
3152  fout << " void InitTransform();" << std::endl;
3153  fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3154  fout << std::endl;
3155  }
3156  fout << " // common member variables" << std::endl;
3157  fout << " const char* fClassName;" << std::endl;
3158  fout << std::endl;
3159  fout << " const size_t fNvars;" << std::endl;
3160  fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3161  fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3162  fout << std::endl;
3163  fout << " // normalisation of input variables" << std::endl;
3164  fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3165  fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3166  fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3167  fout << " // normalise to output range: [-1, 1]" << std::endl;
3168  fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3169  fout << " }" << std::endl;
3170  fout << std::endl;
3171  fout << " // type of input variable: 'F' or 'I'" << std::endl;
3172  fout << " char fType[" << GetNvar() << "];" << std::endl;
3173  fout << std::endl;
3174  fout << " // initialize internal variables" << std::endl;
3175  fout << " void Initialize();" << std::endl;
3176  if(GetAnalysisType() == Types::kMulticlass) {
3177  fout << " std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3178  } else {
3179  fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3180  }
3181  fout << "" << std::endl;
3182  fout << " // private members (method specific)" << std::endl;
3183 
3184  // call the classifier specific output (the classifier must close the class !)
3185  MakeClassSpecific( fout, className );
3186 
3187  if(GetAnalysisType() == Types::kMulticlass) {
3188  fout << "inline std::vector<double> " << className << "::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3189  } else {
3190  fout << "inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3191  }
3192  fout << "{" << std::endl;
3193  fout << " // classifier response value" << std::endl;
3194  if(GetAnalysisType() == Types::kMulticlass) {
3195  fout << " std::vector<double> retval;" << std::endl;
3196  } else {
3197  fout << " double retval = 0;" << std::endl;
3198  }
3199  fout << std::endl;
3200  fout << " // classifier response, sanity check first" << std::endl;
3201  fout << " if (!IsStatusClean()) {" << std::endl;
3202  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3203  fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3204  fout << " }" << std::endl;
3205  fout << " else {" << std::endl;
3206  if (IsNormalised()) {
3207  fout << " // normalise variables" << std::endl;
3208  fout << " std::vector<double> iV;" << std::endl;
3209  fout << " iV.reserve(inputValues.size());" << std::endl;
3210  fout << " int ivar = 0;" << std::endl;
3211  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3212  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3213  fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3214  fout << " }" << std::endl;
3215  if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3216  GetMethodType() != Types::kHMatrix) {
3217  fout << " Transform( iV, -1 );" << std::endl;
3218  }
3219 
3220  if(GetAnalysisType() == Types::kMulticlass) {
3221  fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3222  } else {
3223  fout << " retval = GetMvaValue__( iV );" << std::endl;
3224  }
3225  } else {
3226  if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3227  GetMethodType() != Types::kHMatrix) {
3228  fout << " std::vector<double> iV(inputValues);" << std::endl;
3229  fout << " Transform( iV, -1 );" << std::endl;
3230  if(GetAnalysisType() == Types::kMulticlass) {
3231  fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3232  } else {
3233  fout << " retval = GetMvaValue__( iV );" << std::endl;
3234  }
3235  } else {
3236  if(GetAnalysisType() == Types::kMulticlass) {
3237  fout << " retval = GetMulticlassValues__( inputValues );" << std::endl;
3238  } else {
3239  fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3240  }
3241  }
3242  }
3243  fout << " }" << std::endl;
3244  fout << std::endl;
3245  fout << " return retval;" << std::endl;
3246  fout << "}" << std::endl;
3247 
3248  // create output for transformation - if any
3249  if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3250  GetTransformationHandler().MakeFunction(fout, className,2);
3251 
3252  // close the file
3253  fout.close();
3254 }
3255 
3256 ////////////////////////////////////////////////////////////////////////////////
3257 /// prints out method-specific help method
3258 
3259 void TMVA::MethodBase::PrintHelpMessage() const
3260 {
3261  // if options are written to reference file, also append help info
3262  std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3263  std::ofstream* o = 0;
3264  if (gConfig().WriteOptionsReference()) {
3265  Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3266  o = new std::ofstream( GetReferenceFile(), std::ios::app );
3267  if (!o->good()) { // file could not be opened --> Error
3268  Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3269  }
3270  std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3271  }
3272 
3273  // "|--------------------------------------------------------------|"
3274  if (!o) {
3275  Log() << kINFO << Endl;
3276  Log() << gTools().Color("bold")
3277  << "================================================================"
3278  << gTools().Color( "reset" )
3279  << Endl;
3280  Log() << gTools().Color("bold")
3281  << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3282  << gTools().Color( "reset" )
3283  << Endl;
3284  }
3285  else {
3286  Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3287  }
3288 
3289  // print method-specific help message
3290  GetHelpMessage();
3291 
3292  if (!o) {
3293  Log() << Endl;
3294  Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3295  Log() << gTools().Color("bold")
3296  << "================================================================"
3297  << gTools().Color( "reset" )
3298  << Endl;
3299  Log() << Endl;
3300  }
3301  else {
3302  // indicate END
3303  Log() << "# End of Message___" << Endl;
3304  }
3305 
3306  std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3307  if (o) o->close();
3308 }
3309 
3310 // ----------------------- r o o t f i n d i n g ----------------------------
3311 
3312 ////////////////////////////////////////////////////////////////////////////////
3313 /// returns efficiency as function of cut
3314 
3315 Double_t TMVA::MethodBase::GetValueForRoot( Double_t theCut )
3316 {
3317  Double_t retval=0;
3318 
3319  // retrieve the class object
3320  if (Use_Splines_for_Eff_) {
3321  retval = fSplRefS->Eval( theCut );
3322  }
3323  else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3324 
3325  // caution: here we take some "forbidden" action to hide a problem:
3326  // in some cases, in particular for likelihood, the binned efficiency distributions
3327  // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3328  // unbinned information available in the trees, but the unbinned minimization is
3329  // too slow, and we don't need to do a precision measurement here. Hence, we force
3330  // this property.
3331  Double_t eps = 1.0e-5;
3332  if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3333  else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3334 
3335  return retval;
3336 }
3337 
3338 ////////////////////////////////////////////////////////////////////////////////
3339 /// returns the event collection (i.e. the dataset) TRANSFORMED using the
3340 /// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3341 
3342 const std::vector<TMVA::Event*>& TMVA::MethodBase::GetEventCollection( Types::ETreeType type)
3343 {
3344  // if there's no variable transformation for this classifier, just hand back the
3345  // event collection of the data set
3346  if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3347  return (Data()->GetEventCollection(type));
3348  }
3349 
3350  // otherwise, transform ALL the events and hand back the vector of the pointers to the
3351  // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3352  // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3353  Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3354  if (fEventCollections.at(idx) == 0) {
3355  fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3356  fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),kTRUE);
3357  }
3358  return *(fEventCollections.at(idx));
3359 }
3360 
3361 ////////////////////////////////////////////////////////////////////////////////
3362 /// calculates the TMVA version string from the training version code on the fly
3363 
3364 TString TMVA::MethodBase::GetTrainingTMVAVersionString() const
3365 {
3366  UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3367  UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3368  UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3369 
3370  return TString(Form("%i.%i.%i",a,b,c));
3371 }
3372 
3373 ////////////////////////////////////////////////////////////////////////////////
3374 /// calculates the ROOT version string from the training version code on the fly
3375 
3376 TString TMVA::MethodBase::GetTrainingROOTVersionString() const
3377 {
3378  UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3379  UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3380  UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3381 
3382  return TString(Form("%i.%02i/%02i",a,b,c));
3383 }
3384 
3385 ////////////////////////////////////////////////////////////////////////////////
3386 
3387 Double_t TMVA::MethodBase::GetKSTrainingVsTest(Char_t SorB, TString opt){
3388  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
3389  ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
3390 
3391  if (mvaRes != NULL) {
3392  TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3393  TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3394  TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3395  TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3396 
3397  if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3398 
3399  if (SorB == 's' || SorB == 'S')
3400  return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3401  else
3402  return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3403  }
3404  return -1;
3405 }