Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
VariableGaussTransform.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Eckhard v. Toerne
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : VariableGaussTransform *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Eckhard v. Toerne <evt@uni-bonn.de> - Uni Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * U. of Bonn, Germany *
24  * *
25  * Redistribution and use in source and binary forms, with or without *
26  * modification, are permitted according to the terms listed in LICENSE *
27  * (http://tmva.sourceforge.net/LICENSE) *
28  **********************************************************************************/
29 
30 /*! \class TMVA::VariableGaussTransform
31 \ingroup TMVA
32 Gaussian Transformation of input variables.
33 */
34 
36 
37 #include "TMVA/DataSetInfo.h"
38 #include "TMVA/MsgLogger.h"
39 #include "TMVA/PDF.h"
40 #include "TMVA/Tools.h"
41 #include "TMVA/Types.h"
42 #include "TMVA/Version.h"
43 
44 #include "TCanvas.h"
45 #include "TGraph.h"
46 #include "TH1F.h"
47 #include "TMath.h"
48 #include "TVectorF.h"
49 #include "TVectorD.h"
50 
51 #include <exception>
52 #include <iostream>
53 #include <iomanip>
54 #include <list>
55 #include <limits>
56 #include <stdexcept>
57 
58 ClassImp(TMVA::VariableGaussTransform);
59 
60 ////////////////////////////////////////////////////////////////////////////////
61 /// constructor
62 /// can only be applied one after the other when they are created. But in order to
63 /// determine the Gauss transformation
64 
65 TMVA::VariableGaussTransform::VariableGaussTransform( DataSetInfo& dsi, TString strcor )
66 : VariableTransformBase( dsi, Types::kGauss, "Gauss" ),
67  fFlatNotGauss(kFALSE),
68  fPdfMinSmooth(0),
69  fPdfMaxSmooth(0),
70  fElementsperbin(0)
71 {
72  if (strcor=="Uniform") {fFlatNotGauss = kTRUE;
73  SetName("Uniform");
74  }
75 }
76 
77 ////////////////////////////////////////////////////////////////////////////////
78 /// destructor
79 
80 TMVA::VariableGaussTransform::~VariableGaussTransform( void )
81 {
82  CleanUpCumulativeArrays();
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 
87 void TMVA::VariableGaussTransform::Initialize()
88 {
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// calculate the cumulative distributions
93 
94 Bool_t TMVA::VariableGaussTransform::PrepareTransformation (const std::vector<Event*>& events)
95 {
96  Initialize();
97 
98  if (!IsEnabled() || IsCreated()) return kTRUE;
99 
100  Log() << kINFO << "Preparing the Gaussian transformation..." << Endl;
101 
102  UInt_t inputSize = fGet.size();
103  SetNVariables(inputSize);
104 
105  if (inputSize > 200) {
106  Log() << kWARNING << "----------------------------------------------------------------------------"
107  << Endl;
108  Log() << kWARNING
109  << ": More than 200 variables, I hope you have enough memory!!!!" << Endl;
110  Log() << kWARNING << "----------------------------------------------------------------------------"
111  << Endl;
112  // return kFALSE;
113  }
114 
115  GetCumulativeDist( events );
116 
117  SetCreated( kTRUE );
118 
119  return kTRUE;
120 }
121 
122 ////////////////////////////////////////////////////////////////////////////////
123 /// apply the Gauss transformation
124 
125 const TMVA::Event* TMVA::VariableGaussTransform::Transform(const Event* const ev, Int_t cls ) const
126 {
127  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
128  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
129  //EVT if (cls <0 || cls > GetNClasses() ) {
130  //EVT cls = GetNClasses();
131  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
132  //EVT}
133  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
134  //EVT workaround end
135 
136  // get the variable vector of the current event
137  UInt_t inputSize = fGet.size();
138 
139  std::vector<Float_t> input(0);
140  std::vector<Float_t> output(0);
141 
142  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
143  GetInput( ev, input, mask );
144 
145  std::vector<Char_t>::iterator itMask = mask.begin();
146 
147  // TVectorD vec( inputSize );
148  // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
149  Double_t cumulant;
150  //transformation
151  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
152 
153  if ( (*itMask) ){
154  ++itMask;
155  continue;
156  }
157 
158  if (0 != fCumulativePDF[ivar][cls]) {
159  // first make it flat
160  if(fTMVAVersion>TMVA_VERSION(3,9,7))
161  cumulant = (fCumulativePDF[ivar][cls])->GetVal(input.at(ivar));
162  else
163  cumulant = OldCumulant(input.at(ivar), fCumulativePDF[ivar][cls]->GetOriginalHist() );
164  cumulant = TMath::Min(cumulant,1.-10e-10);
165  cumulant = TMath::Max(cumulant,0.+10e-10);
166 
167  if (fFlatNotGauss)
168  output.push_back( cumulant );
169  else {
170  // sanity correction for out-of-range values
171  Double_t maxErfInvArgRange = 0.99999999;
172  Double_t arg = 2.0*cumulant - 1.0;
173  arg = TMath::Min(+maxErfInvArgRange,arg);
174  arg = TMath::Max(-maxErfInvArgRange,arg);
175 
176  output.push_back( 1.414213562*TMath::ErfInverse(arg) );
177  }
178  }
179  }
180 
181  if (fTransformedEvent==0 || fTransformedEvent->GetNVariables()!=ev->GetNVariables()) {
182  if (fTransformedEvent!=0) { delete fTransformedEvent; fTransformedEvent = 0; }
183  fTransformedEvent = new Event();
184  }
185 
186  SetOutput( fTransformedEvent, output, mask, ev );
187 
188  return fTransformedEvent;
189 }
190 
191 ////////////////////////////////////////////////////////////////////////////////
192 /// apply the inverse Gauss or inverse uniform transformation
193 
194 const TMVA::Event* TMVA::VariableGaussTransform::InverseTransform(const Event* const ev, Int_t cls ) const
195 {
196  if (!IsCreated()) Log() << kFATAL << "Transformation not yet created" << Endl;
197  //EVT this is a workaround to address the reader problem with transforma and EvaluateMVA(std::vector<float/double> ,...)
198  //EVT if (cls <0 || cls > GetNClasses() ) {
199  //EVT cls = GetNClasses();
200  //EVT if (GetNClasses() == 1 ) cls = (fCumulativePDF[0].size()==1?0:2);
201  //EVT}
202  if (cls <0 || cls >= (int) fCumulativePDF[0].size()) cls = fCumulativePDF[0].size()-1;
203  //EVT workaround end
204 
205  // get the variable vector of the current event
206  UInt_t inputSize = fGet.size();
207 
208  std::vector<Float_t> input(0);
209  std::vector<Float_t> output(0);
210 
211  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
212  GetInput( ev, input, mask, kTRUE );
213 
214  std::vector<Char_t>::iterator itMask = mask.begin();
215 
216  // TVectorD vec( inputSize );
217  // for (UInt_t ivar=0; ivar<inputSize; ivar++) vec(ivar) = input.at(ivar);
218  Double_t invCumulant;
219  //transformation
220  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
221 
222  if ( (*itMask) ){
223  ++itMask;
224  continue;
225  }
226 
227  if (0 != fCumulativePDF[ivar][cls]) {
228  invCumulant = input.at(ivar);
229 
230  // first de-gauss ist if gaussianized
231  if (!fFlatNotGauss)
232  invCumulant = (TMath::Erf(invCumulant/1.414213562)+1)/2.f;
233 
234  // then de-uniform the values
235  if(fTMVAVersion>TMVA_VERSION(4,0,0))
236  invCumulant = (fCumulativePDF[ivar][cls])->GetValInverse(invCumulant,kTRUE);
237  else
238  Log() << kFATAL << "Inverse Uniform/Gauss transformation not implemented for TMVA versions before 4.1.0" << Endl;
239 
240  output.push_back(invCumulant);
241  }
242  }
243 
244  if (fBackTransformedEvent==0) fBackTransformedEvent = new Event( *ev );
245 
246  SetOutput( fBackTransformedEvent, output, mask, ev, kTRUE );
247 
248  return fBackTransformedEvent;
249 }
250 
251 ////////////////////////////////////////////////////////////////////////////////
252 /// fill the cumulative distributions
253 
254 void TMVA::VariableGaussTransform::GetCumulativeDist( const std::vector< Event*>& events )
255 {
256  const UInt_t inputSize = fGet.size();
257  // const UInt_t nCls = GetNClasses();
258 
259  // const UInt_t nvar = GetNVariables();
260  UInt_t nevt = events.size();
261 
262  const UInt_t nClasses = GetNClasses();
263  UInt_t numDist = nClasses+1; // calculate cumulative distributions for all "event" classes separately + one where all classes are treated (added) together
264 
265  if (GetNClasses() == 1 ) numDist = nClasses; // for regression, if there is only one class, there is no "sum" of classes, hence
266 
267  UInt_t **nbins = new UInt_t*[numDist];
268 
269  std::list< TMVA::TMVAGaussPair > **listsForBinning = new std::list<TMVA::TMVAGaussPair>* [numDist];
270  std::vector< Float_t > **vsForBinning = new std::vector<Float_t>* [numDist];
271  for (UInt_t i=0; i < numDist; i++) {
272  listsForBinning[i] = new std::list<TMVA::TMVAGaussPair> [inputSize];
273  vsForBinning[i] = new std::vector<Float_t> [inputSize];
274  nbins[i] = new UInt_t[inputSize]; // nbins[0] = number of bins for signal distributions. It depends on the number of entries, thus it's the same for all the input variables, but it isn't necessary for some "weird" reason.
275  }
276 
277  std::vector<Float_t> input;
278  std::vector<Char_t> mask; // entries with kTRUE must not be transformed
279 
280  // perform event loop
281  Float_t *sumOfWeights = new Float_t[numDist];
282  Float_t *minWeight = new Float_t[numDist];
283  Float_t *maxWeight = new Float_t[numDist];
284  for (UInt_t i=0; i<numDist; i++) {
285  sumOfWeights[i]=0;
286  minWeight[i]=10E10; // TODO: change this to std::max ?
287  maxWeight[i]=0; // QUESTION: wouldn't there be negative events possible?
288  }
289  for (UInt_t ievt=0; ievt < nevt; ievt++) {
290  const Event* ev= events[ievt];
291  Int_t cls = ev->GetClass();
292  Float_t eventWeight = ev->GetWeight();
293  sumOfWeights[cls] += eventWeight;
294  if (minWeight[cls] > eventWeight) minWeight[cls]=eventWeight;
295  if (maxWeight[cls] < eventWeight) maxWeight[cls]=eventWeight;
296  if (numDist>1) sumOfWeights[numDist-1] += eventWeight;
297 
298  Bool_t hasMaskedEntries = GetInput( ev, input, mask );
299  if( hasMaskedEntries ){
300  Log() << kWARNING << "Incomplete event" << Endl;
301  std::ostringstream oss;
302  ev->Print(oss);
303  Log() << oss.str();
304  Log() << kFATAL << "Targets or variables masked by transformation. Apparently (a) value(s) is/are missing in this event." << Endl;
305  }
306 
307 
308  Int_t ivar = 0;
309  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
310  Float_t value = (*itInput);
311  listsForBinning[cls][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
312  if (numDist>1)listsForBinning[numDist-1][ivar].push_back(TMVA::TMVAGaussPair(value,eventWeight));
313  ++ivar;
314  }
315  }
316  if (numDist > 1) {
317  for (UInt_t icl=0; icl<numDist-1; icl++){
318  minWeight[numDist-1] = TMath::Min(minWeight[icl],minWeight[numDist-1]);
319  maxWeight[numDist-1] = TMath::Max(maxWeight[icl],maxWeight[numDist-1]);
320  }
321  }
322 
323  // Sorting the lists, getting nbins ...
324  const UInt_t nevmin=10; // minimum number of events per bin (to make sure we get reasonable distributions)
325  const UInt_t nbinsmax=2000; // maximum number of bins
326 
327  for (UInt_t icl=0; icl< numDist; icl++){
328  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
329  listsForBinning[icl][ivar].sort();
330  std::list< TMVA::TMVAGaussPair >::iterator it;
331  Float_t sumPerBin = sumOfWeights[icl]/nbinsmax;
332  sumPerBin=TMath::Max(minWeight[icl]*nevmin,sumPerBin);
333  Float_t sum=0;
334  Float_t ev_value=listsForBinning[icl][ivar].begin()->GetValue();
335  Float_t lastev_value=ev_value;
336  const Float_t eps = 1.e-4;
337  vsForBinning[icl][ivar].push_back(ev_value-eps);
338  vsForBinning[icl][ivar].push_back(ev_value);
339 
340  for (it=listsForBinning[icl][ivar].begin(); it != listsForBinning[icl][ivar].end(); ++it){
341  sum+= it->GetWeight();
342  if (sum >= sumPerBin) {
343  ev_value=it->GetValue();
344  if (ev_value>lastev_value) { // protection against bin width of 0
345  vsForBinning[icl][ivar].push_back(ev_value);
346  sum = 0.;
347  lastev_value=ev_value;
348  }
349  }
350  }
351  if (sum!=0) vsForBinning[icl][ivar].push_back(listsForBinning[icl][ivar].back().GetValue());
352  nbins[icl][ivar] = vsForBinning[icl][ivar].size();
353  }
354  }
355 
356  delete[] sumOfWeights;
357  delete[] minWeight;
358  delete[] maxWeight;
359 
360  // create histogram for the cumulative distribution.
361  fCumulativeDist.resize(inputSize);
362  for (UInt_t icls = 0; icls < numDist; icls++) {
363  for (UInt_t ivar=0; ivar < inputSize; ivar++){
364  Float_t* binnings = new Float_t[nbins[icls][ivar]];
365  //the binning for this particular histogram:
366  for (UInt_t k =0 ; k < nbins[icls][ivar]; k++){
367  binnings[k] = vsForBinning[icls][ivar][k];
368  }
369  fCumulativeDist[ivar].resize(numDist);
370  if (0 != fCumulativeDist[ivar][icls] ) {
371  delete fCumulativeDist[ivar][icls];
372  }
373  fCumulativeDist[ivar][icls] = new TH1F(Form("Cumulative_Var%d_cls%d",ivar,icls),
374  Form("Cumulative_Var%d_cls%d",ivar,icls),
375  nbins[icls][ivar] -1, // class icls
376  binnings);
377  fCumulativeDist[ivar][icls]->SetDirectory(0);
378  delete [] binnings;
379  }
380  }
381 
382  // Deallocation
383  for (UInt_t i=0; i<numDist; i++) {
384  delete [] listsForBinning[numDist-i-1];
385  delete [] vsForBinning[numDist-i-1];
386  delete [] nbins[numDist-i-1];
387  }
388  delete [] listsForBinning;
389  delete [] vsForBinning;
390  delete [] nbins;
391 
392  // perform event loop
393  std::vector<Int_t> ic(numDist);
394  for (UInt_t ievt=0; ievt<nevt; ievt++) {
395 
396  const Event* ev= events[ievt];
397  Int_t cls = ev->GetClass();
398  Float_t eventWeight = ev->GetWeight();
399 
400  GetInput( ev, input, mask );
401 
402  Int_t ivar = 0;
403  for( std::vector<Float_t>::iterator itInput = input.begin(), itInputEnd = input.end(); itInput != itInputEnd; ++itInput ) {
404  Float_t value = (*itInput);
405  fCumulativeDist[ivar][cls]->Fill(value,eventWeight);
406  if (numDist>1) fCumulativeDist[ivar][numDist-1]->Fill(value,eventWeight);
407 
408  ++ivar;
409  }
410  }
411 
412  // clean up
413  CleanUpCumulativeArrays("PDF");
414 
415  // now sum up in order to get the real cumulative distribution
416  Double_t sum = 0, total=0;
417  fCumulativePDF.resize(inputSize);
418  for (UInt_t ivar=0; ivar<inputSize; ivar++) {
419  // fCumulativePDF.resize(ivar+1);
420  for (UInt_t icls=0; icls<numDist; icls++) {
421  (fCumulativeDist[ivar][icls])->Smooth();
422  sum = 0;
423  total = 0.;
424  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
425  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
426  if (val>0) total += val;
427  }
428  for (Int_t ibin=1, ibinEnd=fCumulativeDist[ivar][icls]->GetNbinsX(); ibin <=ibinEnd ; ibin++){
429  Float_t val = (fCumulativeDist[ivar][icls])->GetBinContent(ibin);
430  if (val>0) sum += val;
431  (fCumulativeDist[ivar][icls])->SetBinContent(ibin,sum/total);
432  }
433  // create PDf
434  fCumulativePDF[ivar].push_back(new PDF( Form("GaussTransform var%d cls%d",ivar,icls), fCumulativeDist[ivar][icls], PDF::kSpline1, fPdfMinSmooth, fPdfMaxSmooth,kFALSE,kFALSE));
435  }
436  }
437 }
438 
439 ////////////////////////////////////////////////////////////////////////////////
440 
441 void TMVA::VariableGaussTransform::WriteTransformationToStream( std::ostream& ) const
442 {
443  Log() << kFATAL << "VariableGaussTransform::WriteTransformationToStream is obsolete" << Endl;
444 }
445 
446 ////////////////////////////////////////////////////////////////////////////////
447 /// clean up of cumulative arrays
448 
449 void TMVA::VariableGaussTransform::CleanUpCumulativeArrays(TString opt) {
450  if (opt == "ALL" || opt == "PDF"){
451  for (UInt_t ivar=0; ivar<fCumulativePDF.size(); ivar++) {
452  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++) {
453  if (0 != fCumulativePDF[ivar][icls]) delete fCumulativePDF[ivar][icls];
454  }
455  }
456  fCumulativePDF.clear();
457  }
458  if (opt == "ALL" || opt == "Dist"){
459  for (UInt_t ivar=0; ivar<fCumulativeDist.size(); ivar++) {
460  for (UInt_t icls=0; icls<fCumulativeDist[ivar].size(); icls++) {
461  if (0 != fCumulativeDist[ivar][icls]) delete fCumulativeDist[ivar][icls];
462  }
463  }
464  fCumulativeDist.clear();
465  }
466 }
467 ////////////////////////////////////////////////////////////////////////////////
468 /// create XML description of Gauss transformation
469 
470 void TMVA::VariableGaussTransform::AttachXMLTo(void* parent) {
471  void* trfxml = gTools().AddChild(parent, "Transform");
472  gTools().AddAttr(trfxml, "Name", "Gauss");
473  gTools().AddAttr(trfxml, "FlatOrGauss", (fFlatNotGauss?"Flat":"Gauss") );
474 
475  VariableTransformBase::AttachXMLTo( trfxml );
476 
477  UInt_t nvar = fGet.size();
478  for (UInt_t ivar=0; ivar<nvar; ivar++) {
479  void* varxml = gTools().AddChild( trfxml, "Variable");
480  // gTools().AddAttr( varxml, "Name", Variables()[ivar].GetLabel() );
481  gTools().AddAttr( varxml, "VarIndex", ivar );
482 
483  if ( fCumulativePDF[ivar][0]==0 ||
484  (fCumulativePDF[ivar].size()>1 && fCumulativePDF[ivar][1]==0 ))
485  Log() << kFATAL << "Cumulative histograms for variable " << ivar << " don't exist, can't write it to weight file" << Endl;
486 
487  for (UInt_t icls=0; icls<fCumulativePDF[ivar].size(); icls++){
488  void* pdfxml = gTools().AddChild( varxml, Form("CumulativePDF_cls%d",icls));
489  (fCumulativePDF[ivar][icls])->AddXMLTo(pdfxml);
490  }
491  }
492 }
493 
494 ////////////////////////////////////////////////////////////////////////////////
495 /// Read the transformation matrices from the xml node
496 
497 void TMVA::VariableGaussTransform::ReadFromXML( void* trfnode ) {
498  // clean up first
499  CleanUpCumulativeArrays();
500  TString FlatOrGauss;
501 
502  gTools().ReadAttr(trfnode, "FlatOrGauss", FlatOrGauss );
503 
504  if (FlatOrGauss == "Flat") fFlatNotGauss = kTRUE;
505  else fFlatNotGauss = kFALSE;
506 
507  Bool_t newFormat = kFALSE;
508 
509  void* inpnode = NULL;
510 
511  inpnode = gTools().GetChild(trfnode, "Selection"); // new xml format
512  if( inpnode!=NULL )
513  newFormat = kTRUE; // new xml format
514 
515  void* varnode = NULL;
516  if( newFormat ){
517  // ------------- new format --------------------
518  // read input
519  VariableTransformBase::ReadFromXML( inpnode );
520 
521  varnode = gTools().GetNextChild(inpnode);
522  }else
523  varnode = gTools().GetChild(trfnode);
524 
525  // Read the cumulative distribution
526 
527  TString varname, histname, classname;
528  UInt_t ivar;
529  while(varnode) {
530  if( gTools().HasAttr(varnode,"Name") )
531  gTools().ReadAttr(varnode, "Name", varname);
532  gTools().ReadAttr(varnode, "VarIndex", ivar);
533 
534  void* clsnode = gTools().GetChild( varnode);
535 
536  while(clsnode) {
537  void* pdfnode = gTools().GetChild( clsnode);
538  PDF* pdfToRead = new PDF(TString("tempName"),kFALSE);
539  pdfToRead->ReadXML(pdfnode); // pdfnode
540  // push_back PDF
541  fCumulativePDF.resize( ivar+1 );
542  fCumulativePDF[ivar].push_back(pdfToRead);
543  clsnode = gTools().GetNextChild(clsnode);
544  }
545 
546  varnode = gTools().GetNextChild(varnode);
547  }
548  SetCreated();
549 }
550 
551 ////////////////////////////////////////////////////////////////////////////////
552 /// Read the cumulative distribution
553 
554 void TMVA::VariableGaussTransform::ReadTransformationFromStream( std::istream& istr, const TString& classname)
555 {
556  Bool_t addDirStatus = TH1::AddDirectoryStatus();
557  TH1::AddDirectory(0); // this avoids the binding of the hists in TMVA::PDF to the current ROOT file
558  char buf[512];
559  istr.getline(buf,512);
560 
561  TString strvar, dummy;
562 
563  while (!(buf[0]=='#'&& buf[1]=='#')) { // if line starts with ## return
564  char* p = buf;
565  while (*p==' ' || *p=='\t') p++; // 'remove' leading whitespace
566  if (*p=='#' || *p=='\0') {
567  istr.getline(buf,512);
568  continue; // if comment or empty line, read the next line
569  }
570  std::stringstream sstr(buf);
571  sstr >> strvar;
572 
573  if (strvar=="CumulativeHistogram") {
574  UInt_t type(0), ivar(0);
575  TString devnullS(""),hname("");
576  Int_t nbins(0);
577 
578  // coverity[tainted_data_argument]
579  sstr >> type >> ivar >> hname >> nbins >> fElementsperbin;
580 
581  Float_t *Binnings = new Float_t[nbins+1];
582  Float_t val;
583  istr >> devnullS; // read the line "BinBoundaries" ..
584  for (Int_t ibin=0; ibin<nbins+1; ibin++) {
585  istr >> val;
586  Binnings[ibin]=val;
587  }
588 
589  if(ivar>=fCumulativeDist.size()) fCumulativeDist.resize(ivar+1);
590  if(type>=fCumulativeDist[ivar].size()) fCumulativeDist[ivar].resize(type+1);
591 
592  TH1F * histToRead = fCumulativeDist[ivar][type];
593  if ( histToRead !=0 ) delete histToRead;
594  // recreate the cumulative histogram to be filled with the values read
595  histToRead = new TH1F( hname, hname, nbins, Binnings );
596  histToRead->SetDirectory(0);
597  fCumulativeDist[ivar][type]=histToRead;
598 
599  istr >> devnullS; // read the line "BinContent" ..
600  for (Int_t ibin=0; ibin<nbins; ibin++) {
601  istr >> val;
602  histToRead->SetBinContent(ibin+1,val);
603  }
604 
605  PDF* pdf = new PDF(hname,histToRead,PDF::kSpline0, 0, 0, kFALSE, kFALSE);
606  // push_back PDF
607  fCumulativePDF.resize(ivar+1);
608  fCumulativePDF[ivar].resize(type+1);
609  fCumulativePDF[ivar][type] = pdf;
610  delete [] Binnings;
611  }
612 
613  // if (strvar=="TransformToFlatInsetadOfGauss=") { // don't correct this spelling mistake
614  if (strvar=="Uniform") { // don't correct this spelling mistake
615  sstr >> fFlatNotGauss;
616  istr.getline(buf,512);
617  break;
618  }
619 
620  istr.getline(buf,512); // reading the next line
621  }
622  TH1::AddDirectory(addDirStatus);
623 
624  UInt_t classIdx=(classname=="signal")?0:1;
625  for(UInt_t ivar=0; ivar<fCumulativePDF.size(); ++ivar) {
626  PDF* src = fCumulativePDF[ivar][classIdx];
627  fCumulativePDF[ivar].push_back(new PDF(src->GetName(),fCumulativeDist[ivar][classIdx],PDF::kSpline0, 0, 0, kFALSE, kFALSE) );
628  }
629 
630  SetTMVAVersion(TMVA_VERSION(3,9,7));
631 
632  SetCreated();
633 }
634 
635 ////////////////////////////////////////////////////////////////////////////////
636 
637 Double_t TMVA::VariableGaussTransform::OldCumulant(Float_t x, TH1* h ) const {
638 
639  Int_t bin = h->FindBin(x);
640  bin = TMath::Max(bin,1);
641  bin = TMath::Min(bin,h->GetNbinsX());
642 
643  Double_t cumulant;
644  Double_t x0, x1, y0, y1;
645  Double_t total = h->GetNbinsX()*fElementsperbin;
646  Double_t supmin = 0.5/total;
647 
648  x0 = h->GetBinLowEdge(TMath::Max(bin,1));
649  x1 = h->GetBinLowEdge(TMath::Min(bin,h->GetNbinsX())+1);
650 
651  y0 = h->GetBinContent(TMath::Max(bin-1,0)); // Y0 = F(x0); Y0 >= 0
652  y1 = h->GetBinContent(TMath::Min(bin, h->GetNbinsX()+1)); // Y1 = F(x1); Y1 <= 1
653 
654  if (bin == 0) {
655  y0 = supmin;
656  y1 = supmin;
657  }
658  if (bin == 1) {
659  y0 = supmin;
660  }
661  if (bin > h->GetNbinsX()) {
662  y0 = 1.-supmin;
663  y1 = 1.-supmin;
664  }
665  if (bin == h->GetNbinsX()) {
666  y1 = 1.-supmin;
667  }
668 
669  if (x0 == x1) {
670  cumulant = y1;
671  } else {
672  cumulant = y0 + (y1-y0)*(x-x0)/(x1-x0);
673  }
674 
675  if (x <= h->GetBinLowEdge(1)){
676  cumulant = supmin;
677  }
678  if (x >= h->GetBinLowEdge(h->GetNbinsX()+1)){
679  cumulant = 1-supmin;
680  }
681  return cumulant;
682 }
683 
684 ////////////////////////////////////////////////////////////////////////////////
685 /// prints the transformation
686 
687 void TMVA::VariableGaussTransform::PrintTransformation( std::ostream& )
688 {
689  Int_t cls = 0;
690  Log() << kINFO << "I do not know yet how to print this... look in the weight file " << cls << ":" << Endl;
691  cls++;
692 }
693 
694 ////////////////////////////////////////////////////////////////////////////////
695 /// creates the transformation function
696 ///
697 
698 void TMVA::VariableGaussTransform::MakeFunction( std::ostream& fout, const TString& fcncName,
699  Int_t part, UInt_t trCounter, Int_t )
700 {
701  const UInt_t nvar = fGet.size();
702  UInt_t numDist = GetNClasses() + 1;
703  Int_t nBins = -1;
704  for (UInt_t icls=0; icls<numDist; icls++) {
705  for (UInt_t ivar=0; ivar<nvar; ivar++) {
706  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
707  if (nbin > nBins) nBins=nbin;
708  }
709  }
710 
711  // creates the gauss transformation function
712  if (part==1) {
713  fout << std::endl;
714  fout << " int nvar;" << std::endl;
715  fout << std::endl;
716  // declare variables
717  fout << " double cumulativeDist["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
718  fout << " double X["<<nvar<<"]["<<numDist<<"]["<<nBins+1<<"];"<<std::endl;
719  fout << " double xMin["<<nvar<<"]["<<numDist<<"];"<<std::endl;
720  fout << " double xMax["<<nvar<<"]["<<numDist<<"];"<<std::endl;
721  fout << " int nbins["<<nvar<<"]["<<numDist<<"];"<<std::endl;
722  }
723  if (part==2) {
724  fout << std::endl;
725  fout << "#include \"math.h\"" << std::endl;
726  fout << std::endl;
727  fout << "//_______________________________________________________________________" << std::endl;
728  fout << "inline void " << fcncName << "::InitTransform_"<<trCounter<<"()" << std::endl;
729  fout << "{" << std::endl;
730  fout << " // Gauss/Uniform transformation, initialisation" << std::endl;
731  fout << " nvar=" << nvar << ";" << std::endl;
732  for (UInt_t icls=0; icls<numDist; icls++) {
733  for (UInt_t ivar=0; ivar<nvar; ivar++) {
734  Int_t nbin=(fCumulativePDF[ivar][icls])->GetGraph()->GetN();
735  fout << " nbins["<<ivar<<"]["<<icls<<"]="<<nbin<<";"<<std::endl;
736  }
737  }
738 
739  // fill meat here
740  // loop over nvar , cls, loop over nBins
741  // fill cumulativeDist with fCumulativePDF[ivar][cls])->GetValue(vec(ivar)
742  for (UInt_t icls=0; icls<numDist; icls++) {
743  for (UInt_t ivar=0; ivar<nvar; ivar++) {
744  // Int_t idx = 0;
745  try{
746  // idx = fGet.at(ivar).second;
747  Char_t type = fGet.at(ivar).first;
748  if( type != 'v' ){
749  Log() << kWARNING << "MakeClass for the Gauss transformation works only for the transformation of variables. The transformation of targets/spectators is not implemented." << Endl;
750  }
751  }catch( std::out_of_range &){
752  Log() << kWARNING << "MakeClass for the Gauss transformation searched for a non existing variable index (" << ivar << ")" << Endl;
753  }
754 
755  // Double_t xmn=Variables()[idx].GetMin();
756  // Double_t xmx=Variables()[idx].GetMax();
757  Double_t xmn = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[0];
758  Double_t xmx = (fCumulativePDF[ivar][icls])->GetGraph()->GetX()[(fCumulativePDF[ivar][icls])->GetGraph()->GetN()-1];
759 
760  fout << " xMin["<<ivar<<"]["<<icls<<"]="<< gTools().StringFromDouble(xmn)<<";"<<std::endl;
761  fout << " xMax["<<ivar<<"]["<<icls<<"]="<<gTools().StringFromDouble(xmx)<<";"<<std::endl;
762  for (Int_t ibin=0; ibin<(fCumulativePDF[ivar][icls])->GetGraph()->GetN(); ibin++) {
763  fout << " cumulativeDist[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetY()[ibin])<< ";"<<std::endl;
764  fout << " X[" << ivar << "]["<< icls<< "]["<<ibin<<"]="<< gTools().StringFromDouble((fCumulativePDF[ivar][icls])->GetGraph()->GetX()[ibin])<< ";"<<std::endl;
765 
766  }
767  }
768  }
769  fout << "}" << std::endl;
770  fout << std::endl;
771  fout << "//_______________________________________________________________________" << std::endl;
772  fout << "inline void " << fcncName << "::Transform_"<<trCounter<<"( std::vector<double>& iv, int clsIn) const" << std::endl;
773  fout << "{" << std::endl;
774  fout << " // Gauss/Uniform transformation" << std::endl;
775  fout << " int cls=clsIn;" << std::endl;
776  fout << " if (cls < 0 || cls > "<<GetNClasses()<<") {"<< std::endl;
777  fout << " if ("<<GetNClasses()<<" > 1 ) cls = "<<GetNClasses()<<";"<< std::endl;
778  fout << " else cls = "<<(fCumulativePDF[0].size()==1?0:2)<<";"<< std::endl;
779  fout << " }"<< std::endl;
780 
781  fout << " // copy the variables which are going to be transformed "<< std::endl;
782  VariableTransformBase::MakeFunction(fout, fcncName, 0, trCounter, 0 );
783  fout << " static std::vector<double> dv; "<< std::endl;
784  fout << " dv.resize(nvar); "<< std::endl;
785  fout << " for (int ivar=0; ivar<nvar; ivar++) dv[ivar] = iv[indicesGet.at(ivar)]; "<< std::endl;
786  fout << " "<< std::endl;
787  fout << " bool FlatNotGauss = "<< (fFlatNotGauss? "true": "false") <<"; "<< std::endl;
788  fout << " double cumulant; "<< std::endl;
789  fout << " //const int nvar = "<<nvar<<"; "<< std::endl;
790  fout << " for (int ivar=0; ivar<nvar; ivar++) { "<< std::endl;
791  fout << " int nbin = nbins[ivar][cls]; "<< std::endl;
792  fout << " int ibin=0; "<< std::endl;
793  fout << " while (dv[ivar] > X[ivar][cls][ibin]) ibin++; "<< std::endl;
794  fout << " "<< std::endl;
795  fout << " if (ibin<0) { ibin=0;} "<< std::endl;
796  fout << " if (ibin>=nbin) { ibin=nbin-1;} "<< std::endl;
797  fout << " int nextbin = ibin; "<< std::endl;
798  fout << " if ((dv[ivar] > X[ivar][cls][ibin] && ibin !=nbin-1) || ibin==0) "<< std::endl;
799  fout << " nextbin++; "<< std::endl;
800  fout << " else "<< std::endl;
801  fout << " nextbin--; "<< std::endl;
802  fout << " "<< std::endl;
803  fout << " double dx = X[ivar][cls][ibin]- X[ivar][cls][nextbin]; "<< std::endl;
804  fout << " double dy = cumulativeDist[ivar][cls][ibin] - cumulativeDist[ivar][cls][nextbin]; "<< std::endl;
805  fout << " cumulant = cumulativeDist[ivar][cls][ibin] + (dv[ivar] - X[ivar][cls][ibin])* dy/dx;"<< std::endl;
806  fout << " "<< std::endl;
807  fout << " "<< std::endl;
808  fout << " if (cumulant>1.-10e-10) cumulant = 1.-10e-10; "<< std::endl;
809  fout << " if (cumulant<10e-10) cumulant = 10e-10; "<< std::endl;
810  fout << " if (FlatNotGauss) dv[ivar] = cumulant; "<< std::endl;
811  fout << " else { "<< std::endl;
812  fout << " double maxErfInvArgRange = 0.99999999; "<< std::endl;
813  fout << " double arg = 2.0*cumulant - 1.0; "<< std::endl;
814  fout << " if (arg > maxErfInvArgRange) arg= maxErfInvArgRange; "<< std::endl;
815  fout << " if (arg < -maxErfInvArgRange) arg=-maxErfInvArgRange; "<< std::endl;
816  fout << " double inverf=0., stp=1. ; "<< std::endl;
817  fout << " while (stp >1.e-10){; "<< std::endl;
818  fout << " if (erf(inverf)>arg) inverf -=stp ; "<< std::endl;
819  fout << " else if (erf(inverf)<=arg && erf(inverf+stp)>=arg) stp=stp/5. ; "<< std::endl;
820  fout << " else inverf += stp; "<< std::endl;
821  fout << " } ; "<< std::endl;
822  fout << " //dv[ivar] = 1.414213562*TMath::ErfInverse(arg); "<< std::endl;
823  fout << " dv[ivar] = 1.414213562* inverf; "<< std::endl;
824  fout << " } "<< std::endl;
825  fout << " } "<< std::endl;
826  fout << " // copy the transformed variables back "<< std::endl;
827  fout << " for (int ivar=0; ivar<nvar; ivar++) iv[indicesPut.at(ivar)] = dv[ivar]; "<< std::endl;
828  fout << "} "<< std::endl;
829  }
830 }