Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TransformationHandler.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TransformationHandler *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * *
21  * Copyright (c) 2005-2011: *
22  * CERN, Switzerland *
23  * MPI-K Heidelberg, Germany *
24  * U. of Bonn, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://tmva.sourceforge.net/LICENSE) *
29  **********************************************************************************/
30 
31 /*! \class TMVA::TransformationHandler
32 \ingroup TMVA
33 Class that contains all the data information.
34 */
35 
37 
38 #include "TMVA/Config.h"
39 #include "TMVA/DataSet.h"
40 #include "TMVA/DataSetInfo.h"
41 #include "TMVA/Event.h"
42 #include "TMVA/MsgLogger.h"
43 #include "TMVA/Ranking.h"
44 #include "TMVA/Tools.h"
45 #include "TMVA/Types.h"
48 #include "TMVA/VariableInfo.h"
54 
55 #include "TAxis.h"
56 #include "TDirectory.h"
57 #include "TH1.h"
58 #include "TH2.h"
59 #include "TList.h"
60 #include "TMath.h"
61 #include "TProfile.h"
62 
63 #include <vector>
64 #include <iomanip>
65 
66 ////////////////////////////////////////////////////////////////////////////////
67 /// constructor
68 
69 TMVA::TransformationHandler::TransformationHandler( DataSetInfo& dsi, const TString& callerName )
70  : fDataSetInfo(dsi),
71  fRootBaseDir(0),
72  fCallerName (callerName),
73  fLogger ( new MsgLogger(TString("TFHandler_" + callerName).Data(), kINFO) )
74 {
75  // produce one entry for each class and one entry for all classes. If there is only one class,
76  // produce only one entry
77  fNumC = (dsi.GetNClasses()<= 1) ? 1 : dsi.GetNClasses()+1;
78 
79  fVariableStats.resize( fNumC );
80  for (Int_t i=0; i<fNumC; i++ ) fVariableStats.at(i).resize(dsi.GetNVariables() + dsi.GetNTargets());
81 }
82 
83 ////////////////////////////////////////////////////////////////////////////////
84 /// destructor
85 
86 TMVA::TransformationHandler::~TransformationHandler()
87 {
88  std::vector<Ranking*>::const_iterator it = fRanking.begin();
89  for (; it != fRanking.end(); ++it) delete *it;
90 
91  fTransformations.SetOwner();
92  delete fLogger;
93 }
94 
95 ////////////////////////////////////////////////////////////////////////////////
96 
97 void TMVA::TransformationHandler::SetCallerName( const TString& name )
98 {
99  fCallerName = name;
100  fLogger->SetSource( TString("TFHandler_" + fCallerName).Data() );
101 }
102 
103 ////////////////////////////////////////////////////////////////////////////////
104 
105 TMVA::VariableTransformBase* TMVA::TransformationHandler::AddTransformation( VariableTransformBase *trf, Int_t cls )
106 {
107  TString tfname = trf->Log().GetName();
108  trf->Log().SetSource(TString(fCallerName+"_"+tfname+"_TF").Data());
109  fTransformations.Add(trf);
110  fTransformationsReferenceClasses.push_back( cls );
111  return trf;
112 }
113 
114 ////////////////////////////////////////////////////////////////////////////////
115 /// Caches calculated summary statistics of transformed variables.
116 ///
117 /// \param[in] k index of class
118 /// \param[in] ivar index of variable
119 /// \param[in] mean the mean value of the variable
120 /// \param[in] rms the root-mean-square value of the variable
121 /// \param[in] min the minimum value of the variable
122 /// \param[in] max the maximum value of the variable
123 
124 void TMVA::TransformationHandler::AddStats( Int_t k, UInt_t ivar, Double_t mean, Double_t rms, Double_t min, Double_t max )
125 {
126  if (rms <= 0 || TMath::IsNaN(rms)) {
127  Log() << kWARNING << "Variable \"" << Variable(ivar).GetExpression()
128  << "\" has zero, negative, or NaN RMS^2: "
129  << rms
130  << " ==> set to zero. Please check the variable content" << Endl;
131  rms = 0;
132  }
133 
134  VariableStat stat; stat.fMean = mean; stat.fRMS = rms; stat.fMin = min; stat.fMax = max;
135  fVariableStats.at(k).at(ivar) = stat;
136 }
137 
138 ////////////////////////////////////////////////////////////////////////////////
139 /// overrides the setting for all classes! (this is put in basically for the likelihood-method)
140 /// be careful with the usage this method
141 
142 void TMVA::TransformationHandler::SetTransformationReferenceClass( Int_t cls )
143 {
144  for (UInt_t i = 0; i < fTransformationsReferenceClasses.size(); i++) {
145  fTransformationsReferenceClasses.at( i ) = cls;
146  }
147 }
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 /// the transformation
151 
152 const TMVA::Event* TMVA::TransformationHandler::Transform( const Event* ev ) const
153 {
154  TListIter trIt(&fTransformations);
155  std::vector<Int_t>::const_iterator rClsIt = fTransformationsReferenceClasses.begin();
156  const Event* trEv = ev;
157  while (VariableTransformBase *trf = (VariableTransformBase*) trIt()) {
158  if (rClsIt == fTransformationsReferenceClasses.end()) Log() << kFATAL<< "invalid read in TransformationHandler::Transform " <<Endl;
159  trEv = trf->Transform(trEv, (*rClsIt) );
160  ++rClsIt;
161  }
162  return trEv;
163 }
164 
165 ////////////////////////////////////////////////////////////////////////////////
166 
167 const TMVA::Event* TMVA::TransformationHandler::InverseTransform( const Event* ev, Bool_t suppressIfNoTargets ) const
168 {
169  if (fTransformationsReferenceClasses.empty()){
170  //Log() << kWARNING << __FILE__ <<":InverseTransform fTransformationsReferenceClasses is empty" << Endl;
171  return ev;
172  }
173  // the inverse transformation
174  TListIter trIt(&fTransformations, kIterBackward);
175  std::vector< Int_t >::const_iterator rClsIt = fTransformationsReferenceClasses.end();
176  --rClsIt;
177  const Event* trEv = ev;
178  UInt_t nvars = 0, ntgts = 0, nspcts = 0;
179  while (VariableTransformBase *trf = (VariableTransformBase*) trIt() ) { // shouldn't be the transformation called in the inverse order for the inversetransformation?????
180  if (trf->IsCreated()) {
181  trf->CountVariableTypes( nvars, ntgts, nspcts );
182  if( !(suppressIfNoTargets && ntgts==0) )
183  trEv = trf->InverseTransform(ev, (*rClsIt) );
184  }
185  else break;
186  --rClsIt;
187  }
188  return trEv;
189 
190 
191  // TListIter trIt(&fTransformations);
192  // std::vector< Int_t >::const_iterator rClsIt = fTransformationsReferenceClasses.begin();
193  // const Event* trEv = ev;
194  // UInt_t nvars = 0, ntgts = 0, nspcts = 0;
195  // while (VariableTransformBase *trf = (VariableTransformBase*) trIt() ) { // shouldn't be the transformation called in the inverse order for the inversetransformation?????
196  // if (trf->IsCreated()) {
197  // trf->CountVariableTypes( nvars, ntgts, nspcts );
198  // if( !(suppressIfNoTargets && ntgts==0) )
199  // trEv = trf->InverseTransform(ev, (*rClsIt) );
200  // }
201  // else break;
202  // rClsIt++;
203  // }
204  // return trEv;
205 
206 }
207 
208 ////////////////////////////////////////////////////////////////////////////////
209 /// computation of transformation
210 
211 const std::vector<TMVA::Event*>* TMVA::TransformationHandler::CalcTransformations( const std::vector<Event*>& events,
212  Bool_t createNewVector )
213 {
214  if (fTransformations.GetEntries() <= 0)
215  return &events;
216 
217  // the transformedEvents are initialised with the initial events and then
218  // subsequently replaced with transformed ones. The n-th transformation will
219  // and on the events as they look like after the (n-1)-the transformation
220  // as intended for the chained transformations
221  std::vector<Event*> *transformedEvents = new std::vector<TMVA::Event*>(events.size());
222  for ( UInt_t ievt = 0; ievt<events.size(); ievt++)
223  transformedEvents->at(ievt) = new Event(*events.at(ievt));
224 
225  TListIter trIt(&fTransformations);
226  std::vector< Int_t >::iterator rClsIt = fTransformationsReferenceClasses.begin();
227  while (VariableTransformBase *trf = (VariableTransformBase*) trIt()) {
228  if (trf->PrepareTransformation(*transformedEvents)) {
229  for (UInt_t ievt = 0; ievt<transformedEvents->size(); ievt++) { // loop through all events
230  *(*transformedEvents)[ievt] = *trf->Transform((*transformedEvents)[ievt],(*rClsIt));
231  }
232  ++rClsIt;
233  }
234  }
235 
236  CalcStats(*transformedEvents);
237 
238  // plot the variables once in this transformation
239  PlotVariables(*transformedEvents);
240 
241  //sometimes, the actual transformed event vector is not used for anything but the previous
242  //CalcStat and PlotVariables calles, in that case, we delete it again (and return NULL)
243  if (!createNewVector) { // if we don't want that newly created event vector to persist, then delete it
244  for ( UInt_t ievt = 0; ievt<transformedEvents->size(); ievt++)
245  delete (*transformedEvents)[ievt];
246  delete transformedEvents;
247  transformedEvents=NULL;
248  }
249 
250  return transformedEvents; // give back the newly created event collection (containing the transformed events)
251 }
252 
253 ////////////////////////////////////////////////////////////////////////////////
254 /// method to calculate minimum, maximum, mean, and RMS for all
255 /// variables used in the MVA
256 
257 void TMVA::TransformationHandler::CalcStats (const std::vector<Event*>& events )
258 {
259  UInt_t nevts = events.size();
260 
261  if (nevts==0)
262  Log() << kFATAL << "No events available to find min, max, mean and rms" << Endl;
263 
264  // if transformation has not been succeeded, the tree may be empty
265  const UInt_t nvar = events[0]->GetNVariables();
266  const UInt_t ntgt = events[0]->GetNTargets();
267 
268  Double_t *sumOfWeights = new Double_t[fNumC];
269  Double_t* *x2 = new Double_t*[fNumC];
270  Double_t* *x0 = new Double_t*[fNumC];
271  Double_t* *varMin = new Double_t*[fNumC];
272  Double_t* *varMax = new Double_t*[fNumC];
273 
274  for (Int_t cls=0; cls<fNumC; cls++) {
275  sumOfWeights[cls]=0;
276  x2[cls] = new Double_t[nvar+ntgt];
277  x0[cls] = new Double_t[nvar+ntgt];
278  varMin[cls] = new Double_t[nvar+ntgt];
279  varMax[cls] = new Double_t[nvar+ntgt];
280  for (UInt_t ivar=0; ivar<nvar+ntgt; ivar++) {
281  x0[cls][ivar] = x2[cls][ivar] = 0;
282  varMin[cls][ivar] = DBL_MAX;
283  varMax[cls][ivar] = -DBL_MAX;
284  }
285  }
286 
287  for (UInt_t ievt=0; ievt<nevts; ievt++) {
288  const Event* ev = events[ievt];
289  Int_t cls = ev->GetClass();
290 
291  Double_t weight = ev->GetWeight();
292  sumOfWeights[cls] += weight;
293  if (fNumC > 1 ) sumOfWeights[fNumC-1] += weight; // if more than one class, store values for all classes
294  for (UInt_t var_tgt = 0; var_tgt < 2; var_tgt++ ){ // first for variables, then for targets
295  UInt_t nloop = ( var_tgt==0?nvar:ntgt );
296  for (UInt_t ivar=0; ivar<nloop; ivar++) {
297  Double_t x = ( var_tgt==0?ev->GetValue(ivar):ev->GetTarget(ivar) );
298 
299  if (x < varMin[cls][(var_tgt*nvar)+ivar]) varMin[cls][(var_tgt*nvar)+ivar]= x;
300  if (x > varMax[cls][(var_tgt*nvar)+ivar]) varMax[cls][(var_tgt*nvar)+ivar]= x;
301 
302  x0[cls][(var_tgt*nvar)+ivar] += x*weight;
303  x2[cls][(var_tgt*nvar)+ivar] += x*x*weight;
304 
305  if (fNumC > 1) {
306  if (x < varMin[fNumC-1][(var_tgt*nvar)+ivar]) varMin[fNumC-1][(var_tgt*nvar)+ivar]= x;
307  if (x > varMax[fNumC-1][(var_tgt*nvar)+ivar]) varMax[fNumC-1][(var_tgt*nvar)+ivar]= x;
308 
309  x0[fNumC-1][(var_tgt*nvar)+ivar] += x*weight;
310  x2[fNumC-1][(var_tgt*nvar)+ivar] += x*x*weight;
311  }
312  }
313  }
314  }
315 
316 
317  // set Mean and RMS
318  for (UInt_t var_tgt = 0; var_tgt < 2; var_tgt++ ){ // first for variables, then for targets
319  UInt_t nloop = ( var_tgt==0?nvar:ntgt );
320  for (UInt_t ivar=0; ivar<nloop; ivar++) {
321  for (Int_t cls = 0; cls < fNumC; cls++) {
322  Double_t mean = x0[cls][(var_tgt*nvar)+ivar]/sumOfWeights[cls];
323  Double_t rms = TMath::Sqrt( x2[cls][(var_tgt*nvar)+ivar]/sumOfWeights[cls] - mean*mean);
324  AddStats(cls, (var_tgt*nvar)+ivar, mean, rms, varMin[cls][(var_tgt*nvar)+ivar], varMax[cls][(var_tgt*nvar)+ivar]);
325  }
326  }
327  }
328 
329  // ------ pretty output of basic statistics -------------------------------
330  // find maximum length in V (and column title)
331  UInt_t maxL = 8, maxV = 0;
332  std::vector<UInt_t> vLengths;
333  for (UInt_t ivar=0; ivar<nvar+ntgt; ivar++) {
334  if( ivar < nvar )
335  maxL = TMath::Max( (UInt_t)Variable(ivar).GetLabel().Length(), maxL );
336  else
337  maxL = TMath::Max( (UInt_t)Target(ivar-nvar).GetLabel().Length(), maxL );
338  }
339  maxV = maxL + 2;
340  // full column length
341  UInt_t clen = maxL + 4*maxV + 11;
342  Log() << kHEADER ;
343  //for (UInt_t i=0; i<clen; i++) //Log() << "-";
344 
345  //Log() << Endl;
346  // full column length
347  Log() << std::setw(maxL) << "Variable";
348  Log() << " " << std::setw(maxV) << "Mean";
349  Log() << " " << std::setw(maxV) << "RMS";
350  Log() << " " << std::setw(maxV) << "[ Min ";
351  Log() << " " << std::setw(maxV) << " Max ]"<< Endl;;
352  for (UInt_t i=0; i<clen; i++) Log() << "-";
353  Log() << Endl;
354 
355  // the numbers
356  TString format = "%#11.5g";
357  for (UInt_t ivar=0; ivar<nvar+ntgt; ivar++) {
358  if( ivar < nvar )
359  Log() << std::setw(maxL) << Variable(ivar).GetLabel() << ":";
360  else
361  Log() << std::setw(maxL) << Target(ivar-nvar).GetLabel() << ":";
362  Log() << std::setw(maxV) << Form( format.Data(), GetMean(ivar) );
363  Log() << std::setw(maxV) << Form( format.Data(), GetRMS(ivar) );
364  Log() << " [" << std::setw(maxV) << Form( format.Data(), GetMin(ivar) );
365  Log() << std::setw(maxV) << Form( format.Data(), GetMax(ivar) ) << " ]";
366  Log() << Endl;
367  }
368  for (UInt_t i=0; i<clen; i++) Log() << "-";
369  Log() << Endl;
370  // ------------------------------------------------------------------------
371 
372  delete[] sumOfWeights;
373  for (Int_t cls=0; cls<fNumC; cls++) {
374  delete [] x2[cls];
375  delete [] x0[cls];
376  delete [] varMin[cls];
377  delete [] varMax[cls];
378  }
379  delete [] x2;
380  delete [] x0;
381  delete [] varMin;
382  delete [] varMax;
383 }
384 
385 ////////////////////////////////////////////////////////////////////////////////
386 /// create transformation function
387 
388 void TMVA::TransformationHandler::MakeFunction( std::ostream& fout, const TString& fncName, Int_t part ) const
389 {
390  TListIter trIt(&fTransformations);
391  std::vector< Int_t >::const_iterator rClsIt = fTransformationsReferenceClasses.begin();
392  UInt_t trCounter=1;
393  while (VariableTransformBase *trf = (VariableTransformBase*) trIt() ) {
394  trf->MakeFunction(fout, fncName, part, trCounter++, (*rClsIt) );
395  ++rClsIt;
396  }
397  if (part==1) {
398  for (Int_t i=0; i<fTransformations.GetSize(); i++) {
399  fout << " void InitTransform_"<<i+1<<"();" << std::endl;
400  fout << " void Transform_"<<i+1<<"( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
401  }
402  }
403  if (part==2) {
404  fout << std::endl;
405  fout << "//_______________________________________________________________________" << std::endl;
406  fout << "inline void " << fncName << "::InitTransform()" << std::endl;
407  fout << "{" << std::endl;
408  for (Int_t i=0; i<fTransformations.GetSize(); i++)
409  fout << " InitTransform_"<<i+1<<"();" << std::endl;
410  fout << "}" << std::endl;
411  fout << std::endl;
412  fout << "//_______________________________________________________________________" << std::endl;
413  fout << "inline void " << fncName << "::Transform( std::vector<double>& iv, int sigOrBgd ) const" << std::endl;
414  fout << "{" << std::endl;
415  for (Int_t i=0; i<fTransformations.GetSize(); i++)
416  fout << " Transform_"<<i+1<<"( iv, sigOrBgd );" << std::endl;
417 
418  fout << "}" << std::endl;
419  }
420 }
421 
422 ////////////////////////////////////////////////////////////////////////////////
423 /// return transformation name
424 
425 TString TMVA::TransformationHandler::GetName() const
426 {
427  TString name("Id");
428  TListIter trIt(&fTransformations);
429  VariableTransformBase *trf;
430  if ((trf = (VariableTransformBase*) trIt())) {
431  name = TString(trf->GetShortName());
432  while ((trf = (VariableTransformBase*) trIt())) name += "_" + TString(trf->GetShortName());
433  }
434  return name;
435 }
436 
437 ////////////////////////////////////////////////////////////////////////////////
438 /// incorporates transformation type into title axis (usually for histograms)
439 
440 TString TMVA::TransformationHandler::GetVariableAxisTitle( const VariableInfo& info ) const
441 {
442  TString xtit = info.GetTitle();
443  // indicate transformation, but not in case of single identity transform
444  if (fTransformations.GetSize() >= 1) {
445  if (fTransformations.GetSize() > 1 ||
446  ((VariableTransformBase*)GetTransformationList().Last())->GetVariableTransform() != Types::kIdentity) {
447  xtit += " (" + GetName() + ")";
448  }
449  }
450  return xtit;
451 }
452 
453 
454 ////////////////////////////////////////////////////////////////////////////////
455 /// create histograms from the input variables
456 /// - histograms for all input variables
457 /// - scatter plots for all pairs of input variables
458 
459 void TMVA::TransformationHandler::PlotVariables (const std::vector<Event*>& events, TDirectory* theDirectory )
460 {
461  if (fRootBaseDir==0 && theDirectory == 0) return;
462 
463  Log() << kDEBUG << "Plot event variables for ";
464  if (theDirectory !=0) Log()<< TString(theDirectory->GetName()) << Endl;
465  else Log() << GetName() << Endl;
466 
467  // extension for transformation type
468  TString transfType = "";
469  if (theDirectory == 0) {
470  transfType += "_";
471  transfType += GetName();
472  }else{ // you plot for the individual classifiers. Note, here the "statistics" still need to be calculated as you are in the testing phase
473  CalcStats(events);
474  }
475 
476  const UInt_t nvar = fDataSetInfo.GetNVariables();
477  const UInt_t ntgt = fDataSetInfo.GetNTargets();
478  const Int_t ncls = fDataSetInfo.GetNClasses();
479 
480  // Create all histograms
481  // do both, scatter and profile plots
482  std::vector<std::vector<TH1*> > hVars( ncls ); // histograms for variables
483  std::vector<std::vector<std::vector<TH2F*> > > mycorr( ncls ); // histograms for correlations
484  std::vector<std::vector<std::vector<TProfile*> > > myprof( ncls ); // histograms for profiles
485 
486  for (Int_t cls = 0; cls < ncls; cls++) {
487  hVars.at(cls).resize ( nvar+ntgt );
488  hVars.at(cls).assign ( nvar+ntgt, 0 ); // fill with zeros
489  mycorr.at(cls).resize( nvar+ntgt );
490  myprof.at(cls).resize( nvar+ntgt );
491  for (UInt_t ivar=0; ivar < nvar+ntgt; ivar++) {
492  mycorr.at(cls).at(ivar).resize( nvar+ntgt );
493  myprof.at(cls).at(ivar).resize( nvar+ntgt );
494  mycorr.at(cls).at(ivar).assign( nvar+ntgt, 0 ); // fill with zeros
495  myprof.at(cls).at(ivar).assign( nvar+ntgt, 0 ); // fill with zeros
496  }
497  }
498 
499  // if there are too many input variables, the creation of correlations plots blows up
500  // memory and basically kills the TMVA execution
501  // --> avoid above critical number (which can be user defined)
502  if (nvar+ntgt > (UInt_t)gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots) {
503  Int_t nhists = (nvar+ntgt)*(nvar+ntgt - 1)/2;
504  Log() << kINFO << gTools().Color("dgreen") << Endl;
505  Log() << kINFO << "<PlotVariables> Will not produce scatter plots ==> " << Endl;
506  Log() << kINFO
507  << "| The number of " << nvar << " input variables and " << ntgt << " target values would require "
508  << nhists << " two-dimensional" << Endl;
509  Log() << kINFO
510  << "| histograms, which would occupy the computer's memory. Note that this" << Endl;
511  Log() << kINFO
512  << "| suppression does not have any consequences for your analysis, other" << Endl;
513  Log() << kINFO
514  << "| than not disposing of these scatter plots. You can modify the maximum" << Endl;
515  Log() << kINFO
516  << "| number of input variables allowed to generate scatter plots in your" << Endl;
517  Log() << "| script via the command line:" << Endl;
518  Log() << kINFO
519  << "| \"(TMVA::gConfig().GetVariablePlotting()).fMaxNumOfAllowedVariablesForScatterPlots = <some int>;\""
520  << gTools().Color("reset") << Endl;
521  Log() << Endl;
522  Log() << kINFO << "Some more output" << Endl;
523  }
524 
525  Double_t timesRMS = gConfig().GetVariablePlotting().fTimesRMS;
526  UInt_t nbins1D = gConfig().GetVariablePlotting().fNbins1D;
527  UInt_t nbins2D = gConfig().GetVariablePlotting().fNbins2D;
528 
529  for (UInt_t var_tgt = 0; var_tgt < 2; var_tgt++) { // create the histos first for the variables, then for the targets
530  UInt_t nloops = ( var_tgt == 0? nvar:ntgt ); // number of variables or number of targets
531  for (UInt_t ivar=0; ivar<nloops; ivar++) {
532  const VariableInfo& info = ( var_tgt == 0 ? Variable( ivar ) : Target(ivar) ); // choose the appropriate one (variable or target)
533  TString myVari = info.GetInternalName();
534 
535  Double_t mean = fVariableStats.at(fNumC-1).at( ( var_tgt*nvar )+ivar).fMean;
536  Double_t rms = fVariableStats.at(fNumC-1).at( ( var_tgt*nvar )+ivar).fRMS;
537 
538  for (Int_t cls = 0; cls < ncls; cls++) {
539 
540  TString className = fDataSetInfo.GetClassInfo(cls)->GetName();
541 
542  // add "target" in case of target variable (required for plotting macros)
543  className += (ntgt == 1 && var_tgt == 1 ? "_target" : "");
544 
545  // choose reasonable histogram ranges, by removing outliers
546  TH1* h = 0;
547  if (info.GetVarType() == 'I') {
548  // special treatment for integer variables
549  Int_t xmin = TMath::Nint( GetMin( ( var_tgt*nvar )+ivar) );
550  Int_t xmax = TMath::Nint( GetMax( ( var_tgt*nvar )+ivar) + 1 );
551  Int_t nbins = xmax - xmin;
552 
553  h = new TH1F( Form("%s__%s%s", myVari.Data(), className.Data(), transfType.Data()),
554  info.GetTitle(), nbins, xmin, xmax );
555  }
556  else {
557  Double_t xmin = TMath::Max( GetMin( ( var_tgt*nvar )+ivar), mean - timesRMS*rms );
558  Double_t xmax = TMath::Min( GetMax( ( var_tgt*nvar )+ivar), mean + timesRMS*rms );
559 
560  //std::cout << "Class="<<cls<<" xmin="<<xmin << " xmax="<<xmax<<" mean="<<mean<<" rms="<<rms<<" timesRMS="<<timesRMS<<std::endl;
561  // protection
562  if (xmin >= xmax) xmax = xmin*1.1; // try first...
563  if (xmin >= xmax) xmax = xmin + 1; // this if xmin == xmax == 0
564  // safety margin for values equal to the maximum within the histogram
565  xmax += (xmax - xmin)/nbins1D;
566 
567  h = new TH1F( Form("%s__%s%s", myVari.Data(), className.Data(), transfType.Data()),
568  info.GetTitle(), nbins1D, xmin, xmax );
569  }
570 
571  h->GetXaxis()->SetTitle( gTools().GetXTitleWithUnit( GetVariableAxisTitle( info ), info.GetUnit() ) );
572  h->GetYaxis()->SetTitle( gTools().GetYTitleWithUnit( *h, info.GetUnit(), kFALSE ) );
573  hVars.at(cls).at((var_tgt*nvar)+ivar) = h;
574 
575  // profile and scatter plots
576  if (nvar+ntgt <= (UInt_t)gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots) {
577 
578  for (UInt_t v_t = 0; v_t < 2; v_t++) {
579  UInt_t nl = ( v_t==0?nvar:ntgt );
580  UInt_t start = ( v_t==0? (var_tgt==0?ivar+1:0):(var_tgt==0?nl:ivar+1) );
581  for (UInt_t j=start; j<nl; j++) {
582  // choose the appropriate one (variable or target)
583  const VariableInfo& infoj = ( v_t == 0 ? Variable( j ) : Target(j) );
584  TString myVarj = infoj.GetInternalName();
585 
586  Double_t rxmin = fVariableStats.at(fNumC-1).at( ( v_t*nvar )+ivar).fMin;
587  Double_t rxmax = fVariableStats.at(fNumC-1).at( ( v_t*nvar )+ivar).fMax;
588  Double_t rymin = fVariableStats.at(fNumC-1).at( ( v_t*nvar )+j).fMin;
589  Double_t rymax = fVariableStats.at(fNumC-1).at( ( v_t*nvar )+j).fMax;
590 
591  // scatter plot
592  TH2F* h2 = new TH2F( Form( "scat_%s_vs_%s_%s%s" , myVarj.Data(), myVari.Data(),
593  className.Data(), transfType.Data() ),
594  Form( "%s versus %s (%s)%s", infoj.GetTitle(), info.GetTitle(),
595  className.Data(), transfType.Data() ),
596  nbins2D, rxmin , rxmax,
597  nbins2D, rymin , rymax );
598 
599  h2->GetXaxis()->SetTitle( gTools().GetXTitleWithUnit( GetVariableAxisTitle( info ), info .GetUnit() ) );
600  h2->GetYaxis()->SetTitle( gTools().GetXTitleWithUnit( GetVariableAxisTitle( infoj ), infoj.GetUnit() ) );
601  mycorr.at(cls).at((var_tgt*nvar)+ivar).at((v_t*nvar)+j) = h2;
602 
603  // profile plot
604  TProfile* p = new TProfile( Form( "prof_%s_vs_%s_%s%s", myVarj.Data(),
605  myVari.Data(), className.Data(),
606  transfType.Data() ),
607  Form( "profile %s versus %s (%s)%s",
608  infoj.GetTitle(), info.GetTitle(),
609  className.Data(), transfType.Data() ), nbins1D,
610  rxmin, rxmax );
611  // info.GetMin(), info.GetMax() );
612 
613  p->GetXaxis()->SetTitle( gTools().GetXTitleWithUnit( GetVariableAxisTitle( info ), info .GetUnit() ) );
614  p->GetYaxis()->SetTitle( gTools().GetXTitleWithUnit( GetVariableAxisTitle( infoj ), infoj.GetUnit() ) );
615  myprof.at(cls).at((var_tgt*nvar)+ivar).at((v_t*nvar)+j) = p;
616  }
617  }
618  }
619  }
620  }
621  }
622 
623  UInt_t nevts = events.size();
624 
625  // compute correlation coefficient between target value and variables (regression only)
626  std::vector<Double_t> xregmean ( nvar+1, 0 );
627  std::vector<Double_t> x2regmean( nvar+1, 0 );
628  std::vector<Double_t> xCregmean( nvar+1, 0 );
629 
630  // fill the histograms (this approach should be faster than individual projection
631  for (UInt_t ievt=0; ievt<nevts; ievt++) {
632 
633  const Event* ev = events[ievt];
634 
635  Float_t weight = ev->GetWeight();
636  Int_t cls = ev->GetClass();
637 
638  // average correlation between first target and variables (so far only for single-target regression)
639  if (ntgt == 1) {
640  Float_t valr = ev->GetTarget(0);
641  xregmean[nvar] += valr;
642  x2regmean[nvar] += valr*valr;
643  for (UInt_t ivar=0; ivar<nvar; ivar++) {
644  Float_t vali = ev->GetValue(ivar);
645  xregmean[ivar] += vali;
646  x2regmean[ivar] += vali*vali;
647  xCregmean[ivar] += vali*valr;
648  }
649  }
650 
651  // fill correlation histograms
652  for (UInt_t var_tgt = 0; var_tgt < 2; var_tgt++) { // create the histos first for the variables, then for the targets
653  UInt_t nloops = ( var_tgt == 0? nvar:ntgt ); // number of variables or number of targets
654  for (UInt_t ivar=0; ivar<nloops; ivar++) {
655  Float_t vali = ( var_tgt == 0 ? ev->GetValue(ivar) : ev->GetTarget(ivar) );
656 
657  // variable histos
658  hVars.at(cls).at( ( var_tgt*nvar )+ivar)->Fill( vali, weight );
659 
660  // correlation histos
661  if (nvar+ntgt <= (UInt_t)gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots) {
662 
663  for (UInt_t v_t = 0; v_t < 2; v_t++) {
664  UInt_t nl = ( v_t==0 ? nvar : ntgt );
665  UInt_t start = ( v_t==0 ? (var_tgt==0?ivar+1:0) : (var_tgt==0?nl:ivar+1) );
666  for (UInt_t j=start; j<nl; j++) {
667  Float_t valj = ( v_t == 0 ? ev->GetValue(j) : ev->GetTarget(j) );
668  mycorr.at(cls).at( ( var_tgt*nvar )+ivar).at( ( v_t*nvar )+j)->Fill( vali, valj, weight );
669  myprof.at(cls).at( ( var_tgt*nvar )+ivar).at( ( v_t*nvar )+j)->Fill( vali, valj, weight );
670  }
671  }
672  }
673  }
674  }
675  }
676 
677  // correlation analysis for ranking (single-target regression only)
678  if (ntgt == 1) {
679  for (UInt_t ivar=0; ivar<=nvar; ivar++) {
680  xregmean[ivar] /= nevts;
681  x2regmean[ivar] = x2regmean[ivar]/nevts - xregmean[ivar]*xregmean[ivar];
682  }
683  for (UInt_t ivar=0; ivar<nvar; ivar++) {
684  xCregmean[ivar] = xCregmean[ivar]/nevts - xregmean[ivar]*xregmean[nvar];
685  xCregmean[ivar] /= TMath::Sqrt( x2regmean[ivar]*x2regmean[nvar] );
686  }
687 
688  fRanking.push_back( new Ranking( GetName() + "Transformation", "|Correlation with target|" ) );
689  for (UInt_t ivar=0; ivar<nvar; ivar++) {
690  Double_t abscor = TMath::Abs( xCregmean[ivar] );
691  fRanking.back()->AddRank( Rank( fDataSetInfo.GetVariableInfo(ivar).GetLabel(), abscor ) );
692  }
693 
694  if (nvar+ntgt <= (UInt_t)gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots) {
695 
696  // compute also mutual information (non-linear correlation measure)
697  fRanking.push_back( new Ranking( GetName() + "Transformation", "Mutual information" ) );
698  for (UInt_t ivar=0; ivar<nvar; ivar++) {
699  TH2F* h1 = mycorr.at(0).at( nvar ).at( ivar );
700  Double_t mi = gTools().GetMutualInformation( *h1 );
701  fRanking.back()->AddRank( Rank( fDataSetInfo.GetVariableInfo(ivar).GetLabel(), mi ) );
702  }
703 
704  // compute correlation ratio (functional correlations measure)
705  fRanking.push_back( new Ranking( GetName() + "Transformation", "Correlation Ratio" ) );
706  for (UInt_t ivar=0; ivar<nvar; ivar++) {
707  TH2F* h2 = mycorr.at(0).at( nvar ).at( ivar );
708  Double_t cr = gTools().GetCorrelationRatio( *h2 );
709  fRanking.back()->AddRank( Rank( fDataSetInfo.GetVariableInfo(ivar).GetLabel(), cr ) );
710  }
711 
712  // additionally compute correlation ratio from transposed histograms since correlation ratio is asymmetric
713  fRanking.push_back( new Ranking( GetName() + "Transformation", "Correlation Ratio (T)" ) );
714  for (UInt_t ivar=0; ivar<nvar; ivar++) {
715  TH2F* h2T = gTools().TransposeHist( *mycorr.at(0).at( nvar ).at( ivar ) );
716  Double_t cr = gTools().GetCorrelationRatio( *h2T );
717  fRanking.back()->AddRank( Rank( fDataSetInfo.GetVariableInfo(ivar).GetLabel(), cr ) );
718  delete h2T;
719  }
720  }
721  }
722  // computes ranking of input variables
723  // separation for 2-class classification
724  else if (fDataSetInfo.GetNClasses() == 2
725  && fDataSetInfo.GetClassInfo("Signal") != NULL
726  && fDataSetInfo.GetClassInfo("Background") != NULL
727  ) { // TODO: ugly hack.. adapt to new framework
728  fRanking.push_back( new Ranking( GetName() + "Transformation", "Separation" ) );
729  for (UInt_t i=0; i<nvar; i++) {
730  Double_t sep = gTools().GetSeparation( hVars.at(fDataSetInfo.GetClassInfo("Signal") ->GetNumber()).at(i),
731  hVars.at(fDataSetInfo.GetClassInfo("Background")->GetNumber()).at(i) );
732  fRanking.back()->AddRank( Rank( hVars.at(fDataSetInfo.GetClassInfo("Signal")->GetNumber()).at(i)->GetTitle(),
733  sep ) );
734  }
735  }
736 
737  // for regression compute performance from correlation with target value
738 
739  // write histograms
740 
741  TDirectory* localDir = theDirectory;
742  if (theDirectory == 0) {
743  // create directory in root dir
744  fRootBaseDir->cd();
745  TString outputDir = TString("InputVariables");
746  TListIter trIt(&fTransformations);
747  while (VariableTransformBase *trf = (VariableTransformBase*) trIt())
748  outputDir += "_" + TString(trf->GetShortName());
749 
750  TString uniqueOutputDir = outputDir;
751  Int_t counter = 0;
752  TObject* o = NULL;
753  while( (o = fRootBaseDir->FindObject(uniqueOutputDir)) != 0 ){
754  uniqueOutputDir = outputDir+Form("_%d",counter);
755  Log() << kINFO << "A " << o->ClassName() << " with name " << o->GetName() << " already exists in "
756  << fRootBaseDir->GetPath() << ", I will try with "<<uniqueOutputDir<<"." << Endl;
757  ++counter;
758  }
759 
760  // TObject* o = fRootBaseDir->FindObject(outputDir);
761  // if (o != 0) {
762  // Log() << kFATAL << "A " << o->ClassName() << " with name " << o->GetName() << " already exists in "
763  // << fRootBaseDir->GetPath() << "("<<outputDir<<")" << Endl;
764  // }
765  localDir = fRootBaseDir->mkdir( uniqueOutputDir );
766  localDir->cd();
767 
768  Log() << kVERBOSE << "Create and switch to directory " << localDir->GetPath() << Endl;
769  }
770  else {
771  theDirectory->cd();
772  }
773 
774  for (UInt_t i=0; i<nvar+ntgt; i++) {
775  for (Int_t cls = 0; cls < ncls; cls++) {
776  if (hVars.at(cls).at(i) != 0) {
777  hVars.at(cls).at(i)->Write();
778  hVars.at(cls).at(i)->SetDirectory(0);
779  delete hVars.at(cls).at(i);
780  }
781  }
782  }
783 
784  // correlation plots have dedicated directory
785  if (nvar+ntgt <= (UInt_t)gConfig().GetVariablePlotting().fMaxNumOfAllowedVariablesForScatterPlots) {
786 
787  localDir = localDir->mkdir( "CorrelationPlots" );
788  localDir ->cd();
789  Log() << kDEBUG << "Create scatter and profile plots in target-file directory: " << Endl;
790  Log() << kDEBUG << localDir->GetPath() << Endl;
791 
792 
793  for (UInt_t i=0; i<nvar+ntgt; i++) {
794  for (UInt_t j=i+1; j<nvar+ntgt; j++) {
795  for (Int_t cls = 0; cls < ncls; cls++) {
796  if (mycorr.at(cls).at(i).at(j) != 0 ) {
797  mycorr.at(cls).at(i).at(j)->Write();
798  mycorr.at(cls).at(i).at(j)->SetDirectory(0);
799  delete mycorr.at(cls).at(i).at(j);
800  }
801  if (myprof.at(cls).at(i).at(j) != 0) {
802  myprof.at(cls).at(i).at(j)->Write();
803  myprof.at(cls).at(i).at(j)->SetDirectory(0);
804  delete myprof.at(cls).at(i).at(j);
805  }
806  }
807  }
808  }
809  }
810  if (theDirectory != 0 ) theDirectory->cd();
811  else fRootBaseDir->cd();
812 }
813 
814 ////////////////////////////////////////////////////////////////////////////////
815 /// returns string for transformation
816 
817 std::vector<TString>* TMVA::TransformationHandler::GetTransformationStringsOfLastTransform() const
818 {
819  VariableTransformBase* trf = ((VariableTransformBase*)GetTransformationList().Last());
820  if (!trf) return 0;
821  else return trf->GetTransformationStrings( fTransformationsReferenceClasses.back() );
822 }
823 
824 ////////////////////////////////////////////////////////////////////////////////
825 /// returns string for transformation
826 
827 const char* TMVA::TransformationHandler::GetNameOfLastTransform() const
828 {
829  VariableTransformBase* trf = ((VariableTransformBase*)GetTransformationList().Last());
830  if (!trf) return 0;
831  else return trf->GetName();
832 }
833 
834 ////////////////////////////////////////////////////////////////////////////////
835 /// write transformation to stream
836 
837 void TMVA::TransformationHandler::WriteToStream( std::ostream& o ) const
838 {
839  TListIter trIt(&fTransformations);
840  std::vector< Int_t >::const_iterator rClsIt = fTransformationsReferenceClasses.begin();
841 
842  o << "NTransformtations " << fTransformations.GetSize() << std::endl << std::endl;
843 
844  ClassInfo* ci;
845  UInt_t i = 1;
846  while (VariableTransformBase *trf = (VariableTransformBase*) trIt()) {
847  o << "#TR -*-*-*-*-*-*-* transformation " << i++ << ": " << trf->GetName() << " -*-*-*-*-*-*-*-" << std::endl;
848  trf->WriteTransformationToStream(o);
849  ci = fDataSetInfo.GetClassInfo( (*rClsIt) );
850  TString clsName;
851  if (ci == 0 ) clsName = "AllClasses";
852  else clsName = ci->GetName();
853  o << "ReferenceClass " << clsName << std::endl;
854  ++rClsIt;
855  }
856 }
857 
858 
859 ////////////////////////////////////////////////////////////////////////////////
860 /// XML node describing the transformation
861 
862 void TMVA::TransformationHandler::AddXMLTo( void* parent ) const
863 {
864  if(!parent) return;
865  void* trfs = gTools().AddChild(parent, "Transformations");
866  gTools().AddAttr( trfs, "NTransformations", fTransformations.GetSize() );
867  TListIter trIt(&fTransformations);
868  while (VariableTransformBase *trf = (VariableTransformBase*) trIt()) trf->AttachXMLTo(trfs);
869 }
870 
871 ////////////////////////////////////////////////////////////////////////////////
872 
873 void TMVA::TransformationHandler::ReadFromStream( std::istream& )
874 {
875  Log() << kFATAL << "Read transformations not implemented" << Endl;
876  // TODO
877 }
878 
879 ////////////////////////////////////////////////////////////////////////////////
880 
881 void TMVA::TransformationHandler::ReadFromXML( void* trfsnode )
882 {
883  void* ch = gTools().GetChild( trfsnode );
884  while(ch) {
885  Int_t idxCls = -1;
886  TString trfname;
887  gTools().ReadAttr(ch, "Name", trfname);
888 
889  VariableTransformBase* newtrf = 0;
890 
891  if (trfname == "Decorrelation" ) {
892  newtrf = new VariableDecorrTransform(fDataSetInfo);
893  }
894  else if (trfname == "PCA" ) {
895  newtrf = new VariablePCATransform(fDataSetInfo);
896  }
897  else if (trfname == "Gauss" ) {
898  newtrf = new VariableGaussTransform(fDataSetInfo);
899  }
900  else if (trfname == "Uniform" ) {
901  newtrf = new VariableGaussTransform(fDataSetInfo, "Uniform");
902  }
903  else if (trfname == "Normalize" ) {
904  newtrf = new VariableNormalizeTransform(fDataSetInfo);
905  }
906  else if (trfname == "Rearrange" ) {
907  newtrf = new VariableRearrangeTransform(fDataSetInfo);
908  }
909  else if (trfname != "None") {
910  }
911  else {
912  Log() << kFATAL << "<ReadFromXML> Variable transform '"
913  << trfname << "' unknown." << Endl;
914  }
915  newtrf->ReadFromXML( ch );
916  AddTransformation( newtrf, idxCls );
917  ch = gTools().GetNextChild(ch);
918  }
919 }
920 
921 ////////////////////////////////////////////////////////////////////////////////
922 /// prints ranking of input variables
923 
924 void TMVA::TransformationHandler::PrintVariableRanking() const
925 {
926  //Log() << kINFO << " " << Endl;
927  Log() << kINFO << "Ranking input variables (method unspecific)..." << Endl;
928  std::vector<Ranking*>::const_iterator it = fRanking.begin();
929  for (; it != fRanking.end(); ++it) (*it)->Print();
930 }
931 
932 ////////////////////////////////////////////////////////////////////////////////
933 
934 Double_t TMVA::TransformationHandler::GetMean( Int_t ivar, Int_t cls ) const
935 {
936  try {
937  return fVariableStats.at(cls).at(ivar).fMean;
938  }
939  catch(...) {
940  try {
941  return fVariableStats.at(fNumC-1).at(ivar).fMean;
942  }
943  catch(...) {
944  Log() << kWARNING << "Inconsistent variable state when reading the mean value. " << Endl;
945  }
946  }
947  Log() << kWARNING << "Inconsistent variable state when reading the mean value. Value 0 given back" << Endl;
948  return 0;
949 }
950 
951 ////////////////////////////////////////////////////////////////////////////////
952 
953 Double_t TMVA::TransformationHandler::GetRMS( Int_t ivar, Int_t cls ) const
954 {
955  try {
956  return fVariableStats.at(cls).at(ivar).fRMS;
957  }
958  catch(...) {
959  try {
960  return fVariableStats.at(fNumC-1).at(ivar).fRMS;
961  }
962  catch(...) {
963  Log() << kWARNING << "Inconsistent variable state when reading the RMS value. " << Endl;
964  }
965  }
966  Log() << kWARNING << "Inconsistent variable state when reading the RMS value. Value 0 given back" << Endl;
967  return 0;
968 }
969 
970 ////////////////////////////////////////////////////////////////////////////////
971 
972 Double_t TMVA::TransformationHandler::GetMin( Int_t ivar, Int_t cls ) const
973 {
974  try {
975  return fVariableStats.at(cls).at(ivar).fMin;
976  }
977  catch(...) {
978  try {
979  return fVariableStats.at(fNumC-1).at(ivar).fMin;
980  }
981  catch(...) {
982  Log() << kWARNING << "Inconsistent variable state when reading the minimum value. " << Endl;
983  }
984  }
985  Log() << kWARNING << "Inconsistent variable state when reading the minimum value. Value 0 given back" << Endl;
986  return 0;
987 }
988 
989 ////////////////////////////////////////////////////////////////////////////////
990 
991 Double_t TMVA::TransformationHandler::GetMax( Int_t ivar, Int_t cls ) const
992 {
993  try {
994  return fVariableStats.at(cls).at(ivar).fMax;
995  }
996  catch(...) {
997  try {
998  return fVariableStats.at(fNumC-1).at(ivar).fMax;
999  }
1000  catch(...) {
1001  Log() << kWARNING << "Inconsistent variable state when reading the maximum value. " << Endl;
1002  }
1003  }
1004  Log() << kWARNING << "Inconsistent variable state when reading the maximum value. Value 0 given back" << Endl;
1005  return 0;
1006 }