Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RuleEnsemble.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleEnsemble *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * A class generating an ensemble of rules *
12  * Input: a forest of decision trees *
13  * Output: an ensemble of rules *
14  * *
15  * Authors (alphabetical): *
16  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, GER *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * Iowa State U. *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 /*! \class TMVA::RuleEnsemble
30 \ingroup TMVA
31 
32 */
33 #include "TMVA/RuleEnsemble.h"
34 
35 #include "TMVA/DataSetInfo.h"
36 #include "TMVA/DecisionTree.h"
37 #include "TMVA/Event.h"
38 #include "TMVA/MethodBase.h"
39 #include "TMVA/MethodRuleFit.h"
40 #include "TMVA/MsgLogger.h"
41 #include "TMVA/RuleFit.h"
42 #include "TMVA/Tools.h"
43 #include "TMVA/Types.h"
44 
45 #include "TRandom3.h"
46 #include "TH1F.h"
47 
48 #include <algorithm>
49 #include <cstdlib>
50 #include <iomanip>
51 #include <list>
52 
53 ////////////////////////////////////////////////////////////////////////////////
54 /// constructor
55 
56 TMVA::RuleEnsemble::RuleEnsemble( RuleFit *rf )
57  : fLearningModel ( kFull )
58  , fImportanceCut ( 0 )
59  , fLinQuantile ( 0.025 ) // default quantile for killing outliers in linear terms
60  , fOffset ( 0 )
61  , fAverageSupport ( 0.8 )
62  , fAverageRuleSigma( 0.4 ) // default value - used if only linear model is chosen
63  , fRuleFSig ( 0 )
64  , fRuleNCave ( 0 )
65  , fRuleNCsig ( 0 )
66  , fRuleMinDist ( 1e-3 ) // closest allowed 'distance' between two rules
67  , fNRulesGenerated ( 0 )
68  , fEvent ( 0 )
69  , fEventCacheOK ( true )
70  , fRuleMapOK ( true )
71  , fRuleMapInd0 ( 0 )
72  , fRuleMapInd1 ( 0 )
73  , fRuleMapEvents ( 0 )
74  , fLogger( new MsgLogger("RuleFit") )
75 {
76  Initialize( rf );
77 }
78 
79 ////////////////////////////////////////////////////////////////////////////////
80 /// copy constructor
81 
82 TMVA::RuleEnsemble::RuleEnsemble( const RuleEnsemble& other )
83  : fAverageSupport ( 1 )
84  , fEvent(0)
85  , fRuleMapEvents(0)
86  , fRuleFit(0)
87  , fLogger( new MsgLogger("RuleFit") )
88 {
89  Copy( other );
90 }
91 
92 ////////////////////////////////////////////////////////////////////////////////
93 /// constructor
94 
95 TMVA::RuleEnsemble::RuleEnsemble()
96  : fLearningModel ( kFull )
97  , fImportanceCut ( 0 )
98  , fLinQuantile ( 0.025 ) // default quantile for killing outliers in linear terms
99  , fOffset ( 0 )
100  , fImportanceRef ( 1.0 )
101  , fAverageSupport ( 0.8 )
102  , fAverageRuleSigma( 0.4 ) // default value - used if only linear model is chosen
103  , fRuleFSig ( 0 )
104  , fRuleNCave ( 0 )
105  , fRuleNCsig ( 0 )
106  , fRuleMinDist ( 1e-3 ) // closest allowed 'distance' between two rules
107  , fNRulesGenerated ( 0 )
108  , fEvent ( 0 )
109  , fEventCacheOK ( true )
110  , fRuleMapOK ( true )
111  , fRuleMapInd0 ( 0 )
112  , fRuleMapInd1 ( 0 )
113  , fRuleMapEvents ( 0 )
114  , fRuleFit ( 0 )
115  , fLogger( new MsgLogger("RuleFit") )
116 {
117 }
118 
119 ////////////////////////////////////////////////////////////////////////////////
120 /// destructor
121 
122 TMVA::RuleEnsemble::~RuleEnsemble()
123 {
124  for ( std::vector<Rule *>::iterator itrRule = fRules.begin(); itrRule != fRules.end(); ++itrRule ) {
125  delete *itrRule;
126  }
127  // NOTE: Should not delete the histos fLinPDFB/S since they are delete elsewhere
128  delete fLogger;
129 }
130 
131 ////////////////////////////////////////////////////////////////////////////////
132 /// Initializes all member variables with default values
133 
134 void TMVA::RuleEnsemble::Initialize( const RuleFit *rf )
135 {
136  SetAverageRuleSigma(0.4); // default value - used if only linear model is chosen
137  fRuleFit = rf;
138  UInt_t nvars = GetMethodBase()->GetNvar();
139  fVarImportance.clear();
140  fLinPDFB.clear();
141  fLinPDFS.clear();
142  //
143  fVarImportance.resize( nvars,0.0 );
144  fLinPDFB.resize( nvars,0 );
145  fLinPDFS.resize( nvars,0 );
146  fImportanceRef = 1.0;
147  for (UInt_t i=0; i<nvars; i++) { // a priori all linear terms are equally valid
148  fLinTermOK.push_back(kTRUE);
149  }
150 }
151 
152 ////////////////////////////////////////////////////////////////////////////////
153 
154 void TMVA::RuleEnsemble::SetMsgType( EMsgType t ) {
155  fLogger->SetMinType(t);
156 }
157 
158 ////////////////////////////////////////////////////////////////////////////////
159 ///
160 /// Get a pointer to the original MethodRuleFit.
161 
162 const TMVA::MethodRuleFit* TMVA::RuleEnsemble::GetMethodRuleFit() const
163 {
164  return ( fRuleFit==0 ? 0:fRuleFit->GetMethodRuleFit());
165 }
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 ///
169 /// Get a pointer to the original MethodRuleFit.
170 
171 const TMVA::MethodBase* TMVA::RuleEnsemble::GetMethodBase() const
172 {
173  return ( fRuleFit==0 ? 0:fRuleFit->GetMethodBase());
174 }
175 
176 ////////////////////////////////////////////////////////////////////////////////
177 /// create model
178 
179 void TMVA::RuleEnsemble::MakeModel()
180 {
181  MakeRules( fRuleFit->GetForest() );
182 
183  MakeLinearTerms();
184 
185  MakeRuleMap();
186 
187  CalcRuleSupport();
188 
189  RuleStatistics();
190 
191  PrintRuleGen();
192 }
193 
194 ////////////////////////////////////////////////////////////////////////////////
195 ///
196 /// Calculates sqrt(Sum(a_i^2)), i=1..N (NOTE do not include a0)
197 
198 Double_t TMVA::RuleEnsemble::CoefficientRadius()
199 {
200  Int_t ncoeffs = fRules.size();
201  if (ncoeffs<1) return 0;
202  //
203  Double_t sum2=0;
204  Double_t val;
205  for (Int_t i=0; i<ncoeffs; i++) {
206  val = fRules[i]->GetCoefficient();
207  sum2 += val*val;
208  }
209  return sum2;
210 }
211 
212 ////////////////////////////////////////////////////////////////////////////////
213 /// reset all rule coefficients
214 
215 void TMVA::RuleEnsemble::ResetCoefficients()
216 {
217  fOffset = 0.0;
218  UInt_t nrules = fRules.size();
219  for (UInt_t i=0; i<nrules; i++) {
220  fRules[i]->SetCoefficient(0.0);
221  }
222 }
223 
224 ////////////////////////////////////////////////////////////////////////////////
225 /// set all rule coefficients
226 
227 void TMVA::RuleEnsemble::SetCoefficients( const std::vector< Double_t > & v )
228 {
229  UInt_t nrules = fRules.size();
230  if (v.size()!=nrules) {
231  Log() << kFATAL << "<SetCoefficients> - BUG TRAP - input vector wrong size! It is = " << v.size()
232  << " when it should be = " << nrules << Endl;
233  }
234  for (UInt_t i=0; i<nrules; i++) {
235  fRules[i]->SetCoefficient(v[i]);
236  }
237 }
238 
239 ////////////////////////////////////////////////////////////////////////////////
240 /// Retrieve all rule coefficients
241 
242 void TMVA::RuleEnsemble::GetCoefficients( std::vector< Double_t > & v )
243 {
244  UInt_t nrules = fRules.size();
245  v.resize(nrules);
246  if (nrules==0) return;
247  //
248  for (UInt_t i=0; i<nrules; i++) {
249  v[i] = (fRules[i]->GetCoefficient());
250  }
251 }
252 
253 ////////////////////////////////////////////////////////////////////////////////
254 /// get list of training events from the rule fitter
255 
256 const std::vector<const TMVA::Event*>* TMVA::RuleEnsemble::GetTrainingEvents() const
257 {
258  return &(fRuleFit->GetTrainingEvents());
259 }
260 
261 ////////////////////////////////////////////////////////////////////////////////
262 /// get the training event from the rule fitter
263 
264 const TMVA::Event * TMVA::RuleEnsemble::GetTrainingEvent(UInt_t i) const
265 {
266  return fRuleFit->GetTrainingEvent(i);
267 }
268 
269 ////////////////////////////////////////////////////////////////////////////////
270 /// remove rules that behave similar
271 
272 void TMVA::RuleEnsemble::RemoveSimilarRules()
273 {
274  Log() << kVERBOSE << "Removing similar rules; distance = " << fRuleMinDist << Endl;
275 
276  UInt_t nrulesIn = fRules.size();
277  TMVA::Rule *first, *second;
278  std::vector< Char_t > removeMe( nrulesIn,false ); // <--- stores boolean
279 
280  Int_t nrem = 0;
281  Int_t remind=-1;
282  Double_t r;
283 
284  for (UInt_t i=0; i<nrulesIn; i++) {
285  if (!removeMe[i]) {
286  first = fRules[i];
287  for (UInt_t k=i+1; k<nrulesIn; k++) {
288  if (!removeMe[k]) {
289  second = fRules[k];
290  Bool_t equal = first->Equal(*second,kTRUE,fRuleMinDist);
291  if (equal) {
292  r = gRandom->Rndm();
293  remind = (r>0.5 ? k:i); // randomly select rule
294  }
295  else {
296  remind = -1;
297  }
298 
299  if (remind>-1) {
300  if (!removeMe[remind]) {
301  removeMe[remind] = true;
302  nrem++;
303  }
304  }
305  }
306  }
307  }
308  }
309  UInt_t ind = 0;
310  Rule *theRule;
311  for (UInt_t i=0; i<nrulesIn; i++) {
312  if (removeMe[i]) {
313  theRule = fRules[ind];
314  fRules.erase( fRules.begin() + ind );
315  delete theRule;
316  ind--;
317  }
318  ind++;
319  }
320  UInt_t nrulesOut = fRules.size();
321  Log() << kVERBOSE << "Removed " << nrulesIn - nrulesOut << " out of " << nrulesIn << " rules" << Endl;
322 }
323 
324 ////////////////////////////////////////////////////////////////////////////////
325 /// cleanup rules
326 
327 void TMVA::RuleEnsemble::CleanupRules()
328 {
329  UInt_t nrules = fRules.size();
330  if (nrules==0) return;
331  Log() << kVERBOSE << "Removing rules with relative importance < " << fImportanceCut << Endl;
332  if (fImportanceCut<=0) return;
333  //
334  // Mark rules to be removed
335  //
336  Rule *therule;
337  Int_t ind=0;
338  for (UInt_t i=0; i<nrules; i++) {
339  if (fRules[ind]->GetRelImportance()<fImportanceCut) {
340  therule = fRules[ind];
341  fRules.erase( fRules.begin() + ind );
342  delete therule;
343  ind--;
344  }
345  ind++;
346  }
347  Log() << kINFO << "Removed " << nrules-ind << " out of a total of " << nrules
348  << " rules with importance < " << fImportanceCut << Endl;
349 }
350 
351 ////////////////////////////////////////////////////////////////////////////////
352 /// cleanup linear model
353 
354 void TMVA::RuleEnsemble::CleanupLinear()
355 {
356  UInt_t nlin = fLinNorm.size();
357  if (nlin==0) return;
358  Log() << kVERBOSE << "Removing linear terms with relative importance < " << fImportanceCut << Endl;
359  //
360  fLinTermOK.clear();
361  for (UInt_t i=0; i<nlin; i++) {
362  fLinTermOK.push_back( (fLinImportance[i]/fImportanceRef > fImportanceCut) );
363  }
364 }
365 
366 ////////////////////////////////////////////////////////////////////////////////
367 /// calculate the support for all rules
368 
369 void TMVA::RuleEnsemble::CalcRuleSupport()
370 {
371  Log() << kVERBOSE << "Evaluating Rule support" << Endl;
372  Double_t s,t,stot,ttot,ssb;
373  Double_t ssig, sbkg, ssum;
374  Int_t indrule=0;
375  stot = 0;
376  ttot = 0;
377  // reset to default values
378  SetAverageRuleSigma(0.4);
379  const std::vector<const Event *> *events = GetTrainingEvents();
380  Double_t nrules = static_cast<Double_t>(fRules.size());
381  Double_t ew;
382  //
383  if ((nrules>0) && (events->size()>0)) {
384  for ( std::vector< Rule * >::iterator itrRule=fRules.begin(); itrRule!=fRules.end(); ++itrRule ) {
385  s=0.0;
386  ssig=0.0;
387  sbkg=0.0;
388  for ( std::vector<const Event * >::const_iterator itrEvent=events->begin(); itrEvent!=events->end(); ++itrEvent ) {
389  if ((*itrRule)->EvalEvent( *(*itrEvent) )) {
390  ew = (*itrEvent)->GetWeight();
391  s += ew;
392  if (GetMethodRuleFit()->DataInfo().IsSignal(*itrEvent)) ssig += ew;
393  else sbkg += ew;
394  }
395  }
396  //
397  s = s/fRuleFit->GetNEveEff();
398  t = s*(1.0-s);
399  t = (t<0 ? 0:sqrt(t));
400  stot += s;
401  ttot += t;
402  ssum = ssig+sbkg;
403  ssb = (ssum>0 ? Double_t(ssig)/Double_t(ssig+sbkg) : 0.0 );
404  (*itrRule)->SetSupport(s);
405  (*itrRule)->SetNorm(t);
406  (*itrRule)->SetSSB( ssb );
407  (*itrRule)->SetSSBNeve(Double_t(ssig+sbkg));
408  indrule++;
409  }
410  fAverageSupport = stot/nrules;
411  fAverageRuleSigma = TMath::Sqrt(fAverageSupport*(1.0-fAverageSupport));
412  Log() << kVERBOSE << "Standard deviation of support = " << fAverageRuleSigma << Endl;
413  Log() << kVERBOSE << "Average rule support = " << fAverageSupport << Endl;
414  }
415 }
416 
417 ////////////////////////////////////////////////////////////////////////////////
418 /// calculate the importance of each rule
419 
420 void TMVA::RuleEnsemble::CalcImportance()
421 {
422  Double_t maxRuleImp = CalcRuleImportance();
423  Double_t maxLinImp = CalcLinImportance();
424  Double_t maxImp = (maxRuleImp>maxLinImp ? maxRuleImp : maxLinImp);
425  SetImportanceRef( maxImp );
426 }
427 
428 ////////////////////////////////////////////////////////////////////////////////
429 /// set reference importance
430 
431 void TMVA::RuleEnsemble::SetImportanceRef(Double_t impref)
432 {
433  for ( UInt_t i=0; i<fRules.size(); i++ ) {
434  fRules[i]->SetImportanceRef(impref);
435  }
436  fImportanceRef = impref;
437 }
438 ////////////////////////////////////////////////////////////////////////////////
439 /// calculate importance of each rule
440 
441 Double_t TMVA::RuleEnsemble::CalcRuleImportance()
442 {
443  Double_t maxImp=-1.0;
444  Double_t imp;
445  Int_t nrules = fRules.size();
446  for ( int i=0; i<nrules; i++ ) {
447  fRules[i]->CalcImportance();
448  imp = fRules[i]->GetImportance();
449  if (imp>maxImp) maxImp = imp;
450  }
451  for ( Int_t i=0; i<nrules; i++ ) {
452  fRules[i]->SetImportanceRef(maxImp);
453  }
454 
455  return maxImp;
456 }
457 
458 ////////////////////////////////////////////////////////////////////////////////
459 /// calculate the linear importance for each rule
460 
461 Double_t TMVA::RuleEnsemble::CalcLinImportance()
462 {
463  Double_t maxImp=-1.0;
464  UInt_t nvars = fLinCoefficients.size();
465  fLinImportance.resize(nvars,0.0);
466  if (!DoLinear()) return maxImp;
467  //
468  // The linear importance is:
469  // I = |b_x|*sigma(x)
470  // Note that the coefficients in fLinCoefficients are for the normalized x
471  // => b'_x * x' = b'_x * sigma(r)*x/sigma(x)
472  // => b_x = b'_x*sigma(r)/sigma(x)
473  // => I = |b'_x|*sigma(r)
474  //
475  Double_t imp;
476  for ( UInt_t i=0; i<nvars; i++ ) {
477  imp = fAverageRuleSigma*TMath::Abs(fLinCoefficients[i]);
478  fLinImportance[i] = imp;
479  if (imp>maxImp) maxImp = imp;
480  }
481  return maxImp;
482 }
483 
484 ////////////////////////////////////////////////////////////////////////////////
485 /// Calculates variable importance using eq (35) in RuleFit paper by Friedman et.al
486 
487 void TMVA::RuleEnsemble::CalcVarImportance()
488 {
489  Log() << kVERBOSE << "Compute variable importance" << Endl;
490  Double_t rimp;
491  UInt_t nrules = fRules.size();
492  if (GetMethodBase()==0) Log() << kFATAL << "RuleEnsemble::CalcVarImportance() - should not be here!" << Endl;
493  UInt_t nvars = GetMethodBase()->GetNvar();
494  UInt_t nvarsUsed;
495  Double_t rimpN;
496  fVarImportance.resize(nvars,0);
497  // rules
498  if (DoRules()) {
499  for ( UInt_t ind=0; ind<nrules; ind++ ) {
500  rimp = fRules[ind]->GetImportance();
501  nvarsUsed = fRules[ind]->GetNumVarsUsed();
502  if (nvarsUsed<1)
503  Log() << kFATAL << "<CalcVarImportance> Variables for importance calc!!!??? A BUG!" << Endl;
504  rimpN = (nvarsUsed > 0 ? rimp/nvarsUsed:0.0);
505  for ( UInt_t iv=0; iv<nvars; iv++ ) {
506  if (fRules[ind]->ContainsVariable(iv)) {
507  fVarImportance[iv] += rimpN;
508  }
509  }
510  }
511  }
512  // linear terms
513  if (DoLinear()) {
514  for ( UInt_t iv=0; iv<fLinTermOK.size(); iv++ ) {
515  if (fLinTermOK[iv]) fVarImportance[iv] += fLinImportance[iv];
516  }
517  }
518  //
519  // Make variable importance relative the strongest variable
520  //
521  Double_t maximp = 0.0;
522  for ( UInt_t iv=0; iv<nvars; iv++ ) {
523  if ( fVarImportance[iv] > maximp ) maximp = fVarImportance[iv];
524  }
525  if (maximp>0) {
526  for ( UInt_t iv=0; iv<nvars; iv++ ) {
527  fVarImportance[iv] *= 1.0/maximp;
528  }
529  }
530 }
531 
532 ////////////////////////////////////////////////////////////////////////////////
533 /// set rules
534 ///
535 /// first clear all
536 
537 void TMVA::RuleEnsemble::SetRules( const std::vector<Rule *> & rules )
538 {
539  DeleteRules();
540  //
541  fRules.resize(rules.size());
542  for (UInt_t i=0; i<fRules.size(); i++) {
543  fRules[i] = rules[i];
544  }
545  fEventCacheOK = kFALSE;
546 }
547 
548 ////////////////////////////////////////////////////////////////////////////////
549 /// Makes rules from the given decision tree.
550 /// First node in all rules is ALWAYS the root node.
551 
552 void TMVA::RuleEnsemble::MakeRules( const std::vector< const DecisionTree *> & forest )
553 {
554  fRules.clear();
555  if (!DoRules()) return;
556  //
557  Int_t nrulesCheck=0;
558  Int_t nrules;
559  Int_t nendn;
560  Double_t sumnendn=0;
561  Double_t sumn2=0;
562  //
563  // UInt_t prevs;
564  UInt_t ntrees = forest.size();
565  for ( UInt_t ind=0; ind<ntrees; ind++ ) {
566  // prevs = fRules.size();
567  MakeRulesFromTree( forest[ind] );
568  nrules = CalcNRules( forest[ind] );
569  nendn = (nrules/2) + 1;
570  sumnendn += nendn;
571  sumn2 += nendn*nendn;
572  nrulesCheck += nrules;
573  }
574  Double_t nmean = (ntrees>0) ? sumnendn/ntrees : 0;
575  Double_t nsigm = TMath::Sqrt( gTools().ComputeVariance(sumn2,sumnendn,ntrees) );
576  Double_t ndev = 2.0*(nmean-2.0-nsigm)/(nmean-2.0+nsigm);
577  //
578  Log() << kVERBOSE << "Average number of end nodes per tree = " << nmean << Endl;
579  if (ntrees>1) Log() << kVERBOSE << "sigma of ditto ( ~= mean-2 ?) = "
580  << nsigm
581  << Endl;
582  Log() << kVERBOSE << "Deviation from exponential model = " << ndev << Endl;
583  Log() << kVERBOSE << "Corresponds to L (eq. 13, RuleFit ppr) = " << nmean << Endl;
584  // a BUG trap
585  if (nrulesCheck != static_cast<Int_t>(fRules.size())) {
586  Log() << kFATAL
587  << "BUG! number of generated and possible rules do not match! N(rules) = " << fRules.size()
588  << " != " << nrulesCheck << Endl;
589  }
590  Log() << kVERBOSE << "Number of generated rules: " << fRules.size() << Endl;
591 
592  // save initial number of rules
593  fNRulesGenerated = fRules.size();
594 
595  RemoveSimilarRules();
596 
597  ResetCoefficients();
598 
599 }
600 
601 ////////////////////////////////////////////////////////////////////////////////
602 /// Make the linear terms as in eq 25, ref 2
603 /// For this the b and (1-b) quantiles are needed
604 
605 void TMVA::RuleEnsemble::MakeLinearTerms()
606 {
607  if (!DoLinear()) return;
608 
609  const std::vector<const Event *> *events = GetTrainingEvents();
610  UInt_t neve = events->size();
611  UInt_t nvars = ((*events)[0])->GetNVariables(); // Event -> GetNVariables();
612  Double_t val,ew;
613  typedef std::pair< Double_t, Int_t> dataType;
614  typedef std::pair< Double_t, dataType > dataPoint;
615 
616  std::vector< std::vector<dataPoint> > vardata(nvars);
617  std::vector< Double_t > varsum(nvars,0.0);
618  std::vector< Double_t > varsum2(nvars,0.0);
619  // first find stats of all variables
620  // vardata[v][i].first -> value of var <v> in event <i>
621  // vardata[v][i].second.first -> the event weight
622  // vardata[v][i].second.second -> the event type
623  for (UInt_t i=0; i<neve; i++) {
624  ew = ((*events)[i])->GetWeight();
625  for (UInt_t v=0; v<nvars; v++) {
626  val = ((*events)[i])->GetValue(v);
627  vardata[v].push_back( dataPoint( val, dataType(ew,((*events)[i])->GetClass()) ) );
628  }
629  }
630  //
631  fLinDP.clear();
632  fLinDM.clear();
633  fLinCoefficients.clear();
634  fLinNorm.clear();
635  fLinDP.resize(nvars,0);
636  fLinDM.resize(nvars,0);
637  fLinCoefficients.resize(nvars,0);
638  fLinNorm.resize(nvars,0);
639 
640  Double_t averageWeight = neve ? fRuleFit->GetNEveEff()/static_cast<Double_t>(neve) : 0;
641  // sort and find limits
642  Double_t stdl;
643 
644  // find normalisation given in ref 2 after eq 26
645  Double_t lx;
646  Double_t nquant;
647  Double_t neff;
648  UInt_t indquantM;
649  UInt_t indquantP;
650 
651  MethodBase *fMB=const_cast<MethodBase *>(fRuleFit->GetMethodBase());
652 
653  for (UInt_t v=0; v<nvars; v++) {
654  varsum[v] = 0;
655  varsum2[v] = 0;
656  //
657  std::sort( vardata[v].begin(),vardata[v].end() );
658  nquant = fLinQuantile*fRuleFit->GetNEveEff(); // quantile = 0.025
659  neff=0;
660  UInt_t ie=0;
661  // first scan for lower quantile (including weights)
662  while ( (ie<neve) && (neff<nquant) ) {
663  neff += vardata[v][ie].second.first;
664  ie++;
665  }
666  indquantM = (ie==0 ? 0:ie-1);
667  // now for upper quantile
668  ie = neve;
669  neff=0;
670  while ( (ie>0) && (neff<nquant) ) {
671  ie--;
672  neff += vardata[v][ie].second.first;
673  }
674  indquantP = (ie==neve ? ie=neve-1:ie);
675  //
676  fLinDM[v] = vardata[v][indquantM].first; // delta-
677  fLinDP[v] = vardata[v][indquantP].first; // delta+
678 
679  if(!fMB->IsSilentFile())
680  {
681  if (fLinPDFB[v]) delete fLinPDFB[v];
682  if (fLinPDFS[v]) delete fLinPDFS[v];
683  fLinPDFB[v] = new TH1F(Form("bkgvar%d",v),"bkg temphist",40,fLinDM[v],fLinDP[v]);
684  fLinPDFS[v] = new TH1F(Form("sigvar%d",v),"sig temphist",40,fLinDM[v],fLinDP[v]);
685  fLinPDFB[v]->Sumw2();
686  fLinPDFS[v]->Sumw2();
687  }
688  //
689  Int_t type;
690  const Double_t w = 1.0/fRuleFit->GetNEveEff();
691  for (ie=0; ie<neve; ie++) {
692  val = vardata[v][ie].first;
693  ew = vardata[v][ie].second.first;
694  type = vardata[v][ie].second.second;
695  lx = TMath::Min( fLinDP[v], TMath::Max( fLinDM[v], val ) );
696  varsum[v] += ew*lx;
697  varsum2[v] += ew*lx*lx;
698  if(!fMB->IsSilentFile())
699  {
700  if (type==1) fLinPDFS[v]->Fill(lx,w*ew);
701  else fLinPDFB[v]->Fill(lx,w*ew);
702  }
703  }
704  //
705  // Get normalization.
706  //
707  stdl = TMath::Sqrt( (varsum2[v] - (varsum[v]*varsum[v]/fRuleFit->GetNEveEff()))/(fRuleFit->GetNEveEff()-averageWeight) );
708  fLinNorm[v] = CalcLinNorm(stdl);
709  }
710  // Save PDFs - for debugging purpose
711  if(!fMB->IsSilentFile())
712  {
713  for (UInt_t v=0; v<nvars; v++) {
714  fLinPDFS[v]->Write();
715  fLinPDFB[v]->Write();
716  }
717  }
718 }
719 
720 ////////////////////////////////////////////////////////////////////////////////
721 /// This function returns Pr( y = 1 | x ) for the linear terms.
722 
723 Double_t TMVA::RuleEnsemble::PdfLinear( Double_t & nsig, Double_t & ntot ) const
724 {
725  UInt_t nvars=fLinDP.size();
726 
727  Double_t fstot=0;
728  Double_t fbtot=0;
729  nsig = 0;
730  ntot = nvars;
731  for (UInt_t v=0; v<nvars; v++) {
732  Double_t val = fEventLinearVal[v];
733  Int_t bin = fLinPDFS[v]->FindBin(val);
734  fstot += fLinPDFS[v]->GetBinContent(bin);
735  fbtot += fLinPDFB[v]->GetBinContent(bin);
736  }
737  if (nvars<1) return 0;
738  ntot = (fstot+fbtot)/Double_t(nvars);
739  nsig = (fstot)/Double_t(nvars);
740  return fstot/(fstot+fbtot);
741 }
742 
743 ////////////////////////////////////////////////////////////////////////////////
744 /// This function returns Pr( y = 1 | x ) for rules.
745 /// The probability returned is normalized against the number of rules which are actually passed
746 
747 Double_t TMVA::RuleEnsemble::PdfRule( Double_t & nsig, Double_t & ntot ) const
748 {
749  Double_t sump = 0;
750  Double_t sumok = 0;
751  Double_t sumz = 0;
752  Double_t ssb;
753  Double_t neve;
754  //
755  UInt_t nrules = fRules.size();
756  for (UInt_t ir=0; ir<nrules; ir++) {
757  if (fEventRuleVal[ir]>0) {
758  ssb = fEventRuleVal[ir]*GetRulesConst(ir)->GetSSB(); // S/(S+B) is evaluated in CalcRuleSupport() using ALL training events
759  neve = GetRulesConst(ir)->GetSSBNeve(); // number of events accepted by the rule
760  sump += ssb*neve; // number of signal events
761  sumok += neve; // total number of events passed
762  }
763  else sumz += 1.0; // all events
764  }
765 
766  nsig = sump;
767  ntot = sumok;
768  //
769  if (ntot>0) return nsig/ntot;
770  return 0.0;
771 }
772 
773 ////////////////////////////////////////////////////////////////////////////////
774 /// We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x)
775 /// F(x) = FL(x) + FR(x) , linear and rule part
776 
777 Double_t TMVA::RuleEnsemble::FStar( const Event & e )
778 {
779  SetEvent(e);
780  UpdateEventVal();
781  return FStar();
782 }
783 
784 ////////////////////////////////////////////////////////////////////////////////
785 /// We want to estimate F* = argmin Eyx( L(y,F(x) ), min wrt F(x)
786 /// F(x) = FL(x) + FR(x) , linear and rule part
787 
788 Double_t TMVA::RuleEnsemble::FStar() const
789 {
790  Double_t p=0;
791  Double_t nrs=0, nrt=0;
792  Double_t nls=0, nlt=0;
793  Double_t nt;
794  Double_t pr=0;
795  Double_t pl=0;
796 
797  // first calculate Pr(y=1|X) for rules and linear terms
798  if (DoLinear()) pl = PdfLinear(nls, nlt);
799  if (DoRules()) pr = PdfRule(nrs, nrt);
800  // nr(l)t=0 or 1
801  if ((nlt>0) && (nrt>0)) nt=2.0;
802  else nt=1.0;
803  p = (pl+pr)/nt;
804  return 2.0*p-1.0;
805 }
806 
807 ////////////////////////////////////////////////////////////////////////////////
808 /// calculate various statistics for this rule
809 
810 void TMVA::RuleEnsemble::RuleResponseStats()
811 {
812  // TODO: NOT YET UPDATED FOR WEIGHTS
813  const std::vector<const Event *> *events = GetTrainingEvents();
814  const UInt_t neve = events->size();
815  const UInt_t nvars = GetMethodBase()->GetNvar();
816  const UInt_t nrules = fRules.size();
817  const Event *eveData;
818  // Flags
819  Bool_t sigRule;
820  Bool_t sigTag;
821  Bool_t bkgTag;
822  // Bool_t noTag;
823  Bool_t sigTrue;
824  Bool_t tagged;
825  // Counters
826  Int_t nsig=0;
827  Int_t nbkg=0;
828  Int_t ntag=0;
829  Int_t nss=0;
830  Int_t nsb=0;
831  Int_t nbb=0;
832  Int_t nbs=0;
833  std::vector<Int_t> varcnt;
834  // Clear vectors
835  fRulePSS.clear();
836  fRulePSB.clear();
837  fRulePBS.clear();
838  fRulePBB.clear();
839  fRulePTag.clear();
840  //
841  varcnt.resize(nvars,0);
842  fRuleVarFrac.clear();
843  fRuleVarFrac.resize(nvars,0);
844  //
845  for ( UInt_t i=0; i<nrules; i++ ) {
846  for ( UInt_t v=0; v<nvars; v++) {
847  if (fRules[i]->ContainsVariable(v)) varcnt[v]++; // count how often a variable occurs
848  }
849  sigRule = fRules[i]->IsSignalRule();
850  if (sigRule) { // rule is a signal rule (ie s/(s+b)>0.5)
851  nsig++;
852  }
853  else {
854  nbkg++;
855  }
856  // reset counters
857  nss=0;
858  nsb=0;
859  nbs=0;
860  nbb=0;
861  ntag=0;
862  // loop over all events
863  for (UInt_t e=0; e<neve; e++) {
864  eveData = (*events)[e];
865  tagged = fRules[i]->EvalEvent(*eveData);
866  sigTag = (tagged && sigRule); // it's tagged as a signal
867  bkgTag = (tagged && (!sigRule)); // ... as bkg
868  // noTag = !(sigTag || bkgTag); // ... not tagged
869  sigTrue = (eveData->GetClass() == 0); // true if event is true signal
870  if (tagged) {
871  ntag++;
872  if (sigTag && sigTrue) nss++;
873  if (sigTag && !sigTrue) nsb++;
874  if (bkgTag && sigTrue) nbs++;
875  if (bkgTag && !sigTrue) nbb++;
876  }
877  }
878  // Fill tagging probabilities
879  if (ntag>0 && neve > 0) { // should always be the case, but let's make sure and keep coverity quiet
880  fRulePTag.push_back(Double_t(ntag)/Double_t(neve));
881  fRulePSS.push_back(Double_t(nss)/Double_t(ntag));
882  fRulePSB.push_back(Double_t(nsb)/Double_t(ntag));
883  fRulePBS.push_back(Double_t(nbs)/Double_t(ntag));
884  fRulePBB.push_back(Double_t(nbb)/Double_t(ntag));
885  }
886  //
887  }
888  fRuleFSig = (nsig>0) ? static_cast<Double_t>(nsig)/static_cast<Double_t>(nsig+nbkg) : 0;
889  for ( UInt_t v=0; v<nvars; v++) {
890  fRuleVarFrac[v] = (nrules>0) ? Double_t(varcnt[v])/Double_t(nrules) : 0;
891  }
892 }
893 
894 ////////////////////////////////////////////////////////////////////////////////
895 /// calculate various statistics for this rule
896 
897 void TMVA::RuleEnsemble::RuleStatistics()
898 {
899  const UInt_t nrules = fRules.size();
900  Double_t nc;
901  Double_t sumNc =0;
902  Double_t sumNc2=0;
903  for ( UInt_t i=0; i<nrules; i++ ) {
904  nc = static_cast<Double_t>(fRules[i]->GetNcuts());
905  sumNc += nc;
906  sumNc2 += nc*nc;
907  }
908  fRuleNCave = 0.0;
909  fRuleNCsig = 0.0;
910  if (nrules>0) {
911  fRuleNCave = sumNc/nrules;
912  fRuleNCsig = TMath::Sqrt(gTools().ComputeVariance(sumNc2,sumNc,nrules));
913  }
914 }
915 
916 ////////////////////////////////////////////////////////////////////////////////
917 /// print rule generation info
918 
919 void TMVA::RuleEnsemble::PrintRuleGen() const
920 {
921  Log() << kHEADER << "-------------------RULE ENSEMBLE SUMMARY------------------------" << Endl;
922  const MethodRuleFit *mrf = GetMethodRuleFit();
923  if (mrf) Log() << kINFO << "Tree training method : " << (mrf->UseBoost() ? "AdaBoost":"Random") << Endl;
924  Log() << kINFO << "Number of events per tree : " << fRuleFit->GetNTreeSample() << Endl;
925  Log() << kINFO << "Number of trees : " << fRuleFit->GetForest().size() << Endl;
926  Log() << kINFO << "Number of generated rules : " << fNRulesGenerated << Endl;
927  Log() << kINFO << "Idem, after cleanup : " << fRules.size() << Endl;
928  Log() << kINFO << "Average number of cuts per rule : " << Form("%8.2f",fRuleNCave) << Endl;
929  Log() << kINFO << "Spread in number of cuts per rules : " << Form("%8.2f",fRuleNCsig) << Endl;
930  Log() << kVERBOSE << "Complexity : " << Form("%8.2f",fRuleNCave*fRuleNCsig) << Endl;
931  Log() << kINFO << "----------------------------------------------------------------" << Endl;
932  Log() << kINFO << Endl;
933 }
934 
935 ////////////////////////////////////////////////////////////////////////////////
936 /// print function
937 
938 void TMVA::RuleEnsemble::Print() const
939 {
940  const EMsgType kmtype=kINFO;
941  const Bool_t isDebug = (fLogger->GetMinType()<=kDEBUG);
942  //
943  Log() << kmtype << Endl;
944  Log() << kmtype << "================================================================" << Endl;
945  Log() << kmtype << " M o d e l " << Endl;
946  Log() << kmtype << "================================================================" << Endl;
947 
948  Int_t ind;
949  const UInt_t nvars = GetMethodBase()->GetNvar();
950  const Int_t nrules = fRules.size();
951  const Int_t printN = TMath::Min(10,nrules); //nrules+1;
952  Int_t maxL = 0;
953  for (UInt_t iv = 0; iv<fVarImportance.size(); iv++) {
954  if (GetMethodBase()->GetInputLabel(iv).Length() > maxL) maxL = GetMethodBase()->GetInputLabel(iv).Length();
955  }
956  //
957  if (isDebug) {
958  Log() << kDEBUG << "Variable importance:" << Endl;
959  for (UInt_t iv = 0; iv<fVarImportance.size(); iv++) {
960  Log() << kDEBUG << std::setw(maxL) << GetMethodBase()->GetInputLabel(iv)
961  << std::resetiosflags(std::ios::right)
962  << " : " << Form(" %3.3f",fVarImportance[iv]) << Endl;
963  }
964  }
965  //
966  Log() << kHEADER << "Offset (a0) = " << fOffset << Endl;
967  //
968  if (DoLinear()) {
969  if (fLinNorm.size() > 0) {
970  Log() << kmtype << "------------------------------------" << Endl;
971  Log() << kmtype << "Linear model (weights unnormalised)" << Endl;
972  Log() << kmtype << "------------------------------------" << Endl;
973  Log() << kmtype << std::setw(maxL) << "Variable"
974  << std::resetiosflags(std::ios::right) << " : "
975  << std::setw(11) << " Weights"
976  << std::resetiosflags(std::ios::right) << " : "
977  << "Importance"
978  << std::resetiosflags(std::ios::right)
979  << Endl;
980  Log() << kmtype << "------------------------------------" << Endl;
981  for ( UInt_t i=0; i<fLinNorm.size(); i++ ) {
982  Log() << kmtype << std::setw(std::max(maxL,8)) << GetMethodBase()->GetInputLabel(i);
983  if (fLinTermOK[i]) {
984  Log() << kmtype
985  << std::resetiosflags(std::ios::right)
986  << " : " << Form(" %10.3e",fLinCoefficients[i]*fLinNorm[i])
987  << " : " << Form(" %3.3f",fLinImportance[i]/fImportanceRef) << Endl;
988  }
989  else {
990  Log() << kmtype << "-> importance below threshold = "
991  << Form(" %3.3f",fLinImportance[i]/fImportanceRef) << Endl;
992  }
993  }
994  Log() << kmtype << "------------------------------------" << Endl;
995  }
996  }
997  else Log() << kmtype << "Linear terms were disabled" << Endl;
998 
999  if ((!DoRules()) || (nrules==0)) {
1000  if (!DoRules()) {
1001  Log() << kmtype << "Rule terms were disabled" << Endl;
1002  }
1003  else {
1004  Log() << kmtype << "Even though rules were included in the model, none passed! " << nrules << Endl;
1005  }
1006  }
1007  else {
1008  Log() << kmtype << "Number of rules = " << nrules << Endl;
1009  if (isDebug) {
1010  Log() << kmtype << "N(cuts) in rules, average = " << fRuleNCave << Endl;
1011  Log() << kmtype << " RMS = " << fRuleNCsig << Endl;
1012  Log() << kmtype << "Fraction of signal rules = " << fRuleFSig << Endl;
1013  Log() << kmtype << "Fraction of rules containing a variable (%):" << Endl;
1014  for ( UInt_t v=0; v<nvars; v++) {
1015  Log() << kmtype << " " << std::setw(maxL) << GetMethodBase()->GetInputLabel(v);
1016  Log() << kmtype << Form(" = %2.2f",fRuleVarFrac[v]*100.0) << " %" << Endl;
1017  }
1018  }
1019  //
1020  // Print out all rules sorted in importance
1021  //
1022  std::list< std::pair<double,int> > sortedImp;
1023  for (Int_t i=0; i<nrules; i++) {
1024  sortedImp.push_back( std::pair<double,int>( fRules[i]->GetImportance(),i ) );
1025  }
1026  sortedImp.sort();
1027  //
1028  Log() << kmtype << "Printing the first " << printN << " rules, ordered in importance." << Endl;
1029  int pind=0;
1030  for ( std::list< std::pair<double,int> >::reverse_iterator itpair = sortedImp.rbegin();
1031  itpair != sortedImp.rend(); ++itpair ) {
1032  ind = itpair->second;
1033  // if (pind==0) impref =
1034  // Log() << kmtype << "Rule #" <<
1035  // Log() << kmtype << *fRules[ind] << Endl;
1036  fRules[ind]->PrintLogger(Form("Rule %4d : ",pind+1));
1037  pind++;
1038  if (pind==printN) {
1039  if (nrules==printN) {
1040  Log() << kmtype << "All rules printed" << Endl;
1041  }
1042  else {
1043  Log() << kmtype << "Skipping the next " << nrules-printN << " rules" << Endl;
1044  }
1045  break;
1046  }
1047  }
1048  }
1049  Log() << kmtype << "================================================================" << Endl;
1050  Log() << kmtype << Endl;
1051 }
1052 
1053 ////////////////////////////////////////////////////////////////////////////////
1054 /// write rules to stream
1055 
1056 void TMVA::RuleEnsemble::PrintRaw( std::ostream & os ) const
1057 {
1058  Int_t dp = os.precision();
1059  UInt_t nrules = fRules.size();
1060  // std::sort(fRules.begin(),fRules.end());
1061  //
1062  os << "ImportanceCut= " << fImportanceCut << std::endl;
1063  os << "LinQuantile= " << fLinQuantile << std::endl;
1064  os << "AverageSupport= " << fAverageSupport << std::endl;
1065  os << "AverageRuleSigma= " << fAverageRuleSigma << std::endl;
1066  os << "Offset= " << fOffset << std::endl;
1067  os << "NRules= " << nrules << std::endl;
1068  for (UInt_t i=0; i<nrules; i++){
1069  os << "***Rule " << i << std::endl;
1070  (fRules[i])->PrintRaw(os);
1071  }
1072  UInt_t nlinear = fLinNorm.size();
1073  //
1074  os << "NLinear= " << fLinTermOK.size() << std::endl;
1075  for (UInt_t i=0; i<nlinear; i++) {
1076  os << "***Linear " << i << std::endl;
1077  os << std::setprecision(10) << (fLinTermOK[i] ? 1:0) << " "
1078  << fLinCoefficients[i] << " "
1079  << fLinNorm[i] << " "
1080  << fLinDM[i] << " "
1081  << fLinDP[i] << " "
1082  << fLinImportance[i] << " " << std::endl;
1083  }
1084  os << std::setprecision(dp);
1085 }
1086 
1087 ////////////////////////////////////////////////////////////////////////////////
1088 /// write rules to XML
1089 
1090 void* TMVA::RuleEnsemble::AddXMLTo(void* parent) const
1091 {
1092  void* re = gTools().AddChild( parent, "Weights" ); // this is the "RuleEnsemble"
1093 
1094  UInt_t nrules = fRules.size();
1095  UInt_t nlinear = fLinNorm.size();
1096  gTools().AddAttr( re, "NRules", nrules );
1097  gTools().AddAttr( re, "NLinear", nlinear );
1098  gTools().AddAttr( re, "LearningModel", (int)fLearningModel );
1099  gTools().AddAttr( re, "ImportanceCut", fImportanceCut );
1100  gTools().AddAttr( re, "LinQuantile", fLinQuantile );
1101  gTools().AddAttr( re, "AverageSupport", fAverageSupport );
1102  gTools().AddAttr( re, "AverageRuleSigma", fAverageRuleSigma );
1103  gTools().AddAttr( re, "Offset", fOffset );
1104  for (UInt_t i=0; i<nrules; i++) fRules[i]->AddXMLTo(re);
1105 
1106  for (UInt_t i=0; i<nlinear; i++) {
1107  void* lin = gTools().AddChild( re, "Linear" );
1108  gTools().AddAttr( lin, "OK", (fLinTermOK[i] ? 1:0) );
1109  gTools().AddAttr( lin, "Coeff", fLinCoefficients[i] );
1110  gTools().AddAttr( lin, "Norm", fLinNorm[i] );
1111  gTools().AddAttr( lin, "DM", fLinDM[i] );
1112  gTools().AddAttr( lin, "DP", fLinDP[i] );
1113  gTools().AddAttr( lin, "Importance", fLinImportance[i] );
1114  }
1115  return re;
1116 }
1117 
1118 ////////////////////////////////////////////////////////////////////////////////
1119 /// read rules from XML
1120 
1121 void TMVA::RuleEnsemble::ReadFromXML( void* wghtnode )
1122 {
1123  UInt_t nrules, nlinear;
1124  gTools().ReadAttr( wghtnode, "NRules", nrules );
1125  gTools().ReadAttr( wghtnode, "NLinear", nlinear );
1126  Int_t iLearningModel;
1127  gTools().ReadAttr( wghtnode, "LearningModel", iLearningModel );
1128  fLearningModel = (ELearningModel) iLearningModel;
1129  gTools().ReadAttr( wghtnode, "ImportanceCut", fImportanceCut );
1130  gTools().ReadAttr( wghtnode, "LinQuantile", fLinQuantile );
1131  gTools().ReadAttr( wghtnode, "AverageSupport", fAverageSupport );
1132  gTools().ReadAttr( wghtnode, "AverageRuleSigma", fAverageRuleSigma );
1133  gTools().ReadAttr( wghtnode, "Offset", fOffset );
1134 
1135  // read rules
1136  DeleteRules();
1137 
1138  UInt_t i = 0;
1139  fRules.resize( nrules );
1140  void* ch = gTools().GetChild( wghtnode );
1141  for (i=0; i<nrules; i++) {
1142  fRules[i] = new Rule();
1143  fRules[i]->SetRuleEnsemble( this );
1144  fRules[i]->ReadFromXML( ch );
1145 
1146  ch = gTools().GetNextChild(ch);
1147  }
1148 
1149  // read linear classifier (Fisher)
1150  fLinNorm .resize( nlinear );
1151  fLinTermOK .resize( nlinear );
1152  fLinCoefficients.resize( nlinear );
1153  fLinDP .resize( nlinear );
1154  fLinDM .resize( nlinear );
1155  fLinImportance .resize( nlinear );
1156 
1157  Int_t iok;
1158  i=0;
1159  while(ch) {
1160  gTools().ReadAttr( ch, "OK", iok );
1161  fLinTermOK[i] = (iok == 1);
1162  gTools().ReadAttr( ch, "Coeff", fLinCoefficients[i] );
1163  gTools().ReadAttr( ch, "Norm", fLinNorm[i] );
1164  gTools().ReadAttr( ch, "DM", fLinDM[i] );
1165  gTools().ReadAttr( ch, "DP", fLinDP[i] );
1166  gTools().ReadAttr( ch, "Importance", fLinImportance[i] );
1167 
1168  i++;
1169  ch = gTools().GetNextChild(ch);
1170  }
1171 }
1172 
1173 ////////////////////////////////////////////////////////////////////////////////
1174 /// read rule ensemble from stream
1175 
1176 void TMVA::RuleEnsemble::ReadRaw( std::istream & istr )
1177 {
1178  UInt_t nrules;
1179  //
1180  std::string dummy;
1181  Int_t idum;
1182  //
1183  // First block is general stuff
1184  //
1185  istr >> dummy >> fImportanceCut;
1186  istr >> dummy >> fLinQuantile;
1187  istr >> dummy >> fAverageSupport;
1188  istr >> dummy >> fAverageRuleSigma;
1189  istr >> dummy >> fOffset;
1190  istr >> dummy >> nrules;
1191  //
1192  // Now read in the rules
1193  //
1194  DeleteRules();
1195  //
1196  for (UInt_t i=0; i<nrules; i++){
1197  istr >> dummy >> idum; // read line "***Rule <ind>"
1198  fRules.push_back( new Rule() );
1199  (fRules.back())->SetRuleEnsemble( this );
1200  (fRules.back())->ReadRaw(istr);
1201  }
1202  //
1203  // and now the linear terms
1204  //
1205  UInt_t nlinear;
1206  //
1207  // coverity[tainted_data_argument]
1208  istr >> dummy >> nlinear;
1209  //
1210  fLinNorm .resize( nlinear );
1211  fLinTermOK .resize( nlinear );
1212  fLinCoefficients.resize( nlinear );
1213  fLinDP .resize( nlinear );
1214  fLinDM .resize( nlinear );
1215  fLinImportance .resize( nlinear );
1216  //
1217 
1218  Int_t iok;
1219  for (UInt_t i=0; i<nlinear; i++) {
1220  istr >> dummy >> idum;
1221  istr >> iok;
1222  fLinTermOK[i] = (iok==1);
1223  istr >> fLinCoefficients[i];
1224  istr >> fLinNorm[i];
1225  istr >> fLinDM[i];
1226  istr >> fLinDP[i];
1227  istr >> fLinImportance[i];
1228  }
1229 }
1230 
1231 ////////////////////////////////////////////////////////////////////////////////
1232 /// copy function
1233 
1234 void TMVA::RuleEnsemble::Copy( const RuleEnsemble & other )
1235 {
1236  if(this != &other) {
1237  fRuleFit = other.GetRuleFit();
1238  fRuleMinDist = other.GetRuleMinDist();
1239  fOffset = other.GetOffset();
1240  fRules = other.GetRulesConst();
1241  fImportanceCut = other.GetImportanceCut();
1242  fVarImportance = other.GetVarImportance();
1243  fLearningModel = other.GetLearningModel();
1244  fLinQuantile = other.GetLinQuantile();
1245  fRuleNCsig = other.fRuleNCsig;
1246  fAverageRuleSigma = other.fAverageRuleSigma;
1247  fEventCacheOK = other.fEventCacheOK;
1248  fImportanceRef = other.fImportanceRef;
1249  fNRulesGenerated = other.fNRulesGenerated;
1250  fRuleFSig = other.fRuleFSig;
1251  fRuleMapInd0 = other.fRuleMapInd0;
1252  fRuleMapInd1 = other.fRuleMapInd1;
1253  fRuleMapOK = other.fRuleMapOK;
1254  fRuleNCave = other.fRuleNCave;
1255  }
1256 }
1257 
1258 ////////////////////////////////////////////////////////////////////////////////
1259 /// calculate the number of rules
1260 
1261 Int_t TMVA::RuleEnsemble::CalcNRules( const DecisionTree *dtree )
1262 {
1263  if (dtree==0) return 0;
1264  Node *node = dtree->GetRoot();
1265  Int_t nendnodes = 0;
1266  FindNEndNodes( node, nendnodes );
1267  return 2*(nendnodes-1);
1268 }
1269 
1270 ////////////////////////////////////////////////////////////////////////////////
1271 /// find the number of leaf nodes
1272 
1273 void TMVA::RuleEnsemble::FindNEndNodes( const Node *node, Int_t & nendnodes )
1274 {
1275  if (node==0) return;
1276  if ((node->GetRight()==0) && (node->GetLeft()==0)) {
1277  ++nendnodes;
1278  return;
1279  }
1280  const Node *nodeR = node->GetRight();
1281  const Node *nodeL = node->GetLeft();
1282  FindNEndNodes( nodeR, nendnodes );
1283  FindNEndNodes( nodeL, nendnodes );
1284 }
1285 
1286 ////////////////////////////////////////////////////////////////////////////////
1287 /// create rules from the decision tree structure
1288 
1289 void TMVA::RuleEnsemble::MakeRulesFromTree( const DecisionTree *dtree )
1290 {
1291  Node *node = dtree->GetRoot();
1292  AddRule( node );
1293 }
1294 
1295 ////////////////////////////////////////////////////////////////////////////////
1296 /// add a new rule to the tree
1297 
1298 void TMVA::RuleEnsemble::AddRule( const Node *node )
1299 {
1300  if (node==0) return;
1301  if (node->GetParent()==0) { // it's a root node, don't make a rule
1302  AddRule( node->GetRight() );
1303  AddRule( node->GetLeft() );
1304  }
1305  else {
1306  Rule *rule = MakeTheRule(node);
1307  if (rule) {
1308  fRules.push_back( rule );
1309  AddRule( node->GetRight() );
1310  AddRule( node->GetLeft() );
1311  }
1312  else {
1313  Log() << kFATAL << "<AddRule> - ERROR failed in creating a rule! BUG!" << Endl;
1314  }
1315  }
1316 }
1317 
1318 ////////////////////////////////////////////////////////////////////////////////
1319 /// Make a Rule from a given Node.
1320 /// The root node (ie no parent) does not generate a Rule.
1321 /// The first node in a rule is always the root node => fNodes.size()>=2
1322 /// Each node corresponds to a cut and the cut value is given by the parent node.
1323 
1324 TMVA::Rule *TMVA::RuleEnsemble::MakeTheRule( const Node *node )
1325 {
1326  if (node==0) {
1327  Log() << kFATAL << "<MakeTheRule> Input node is NULL. Should not happen. BUG!" << Endl;
1328  return 0;
1329  }
1330 
1331  if (node->GetParent()==0) { // a root node - ignore
1332  return 0;
1333  }
1334  //
1335  std::vector< const Node * > nodeVec;
1336  const Node *parent = node;
1337  //
1338  // Make list with the input node at the end:
1339  // <root node> <node1> <node2> ... <node given as argument>
1340  //
1341  nodeVec.push_back( node );
1342  while (parent!=0) {
1343  parent = parent->GetParent();
1344  if (!parent) continue;
1345  const DecisionTreeNode* dtn = dynamic_cast<const DecisionTreeNode*>(parent);
1346  if (dtn && dtn->GetSelector()>=0)
1347  nodeVec.insert( nodeVec.begin(), parent );
1348 
1349  }
1350  if (nodeVec.size()<2) {
1351  Log() << kFATAL << "<MakeTheRule> BUG! Inconsistent Rule!" << Endl;
1352  return 0;
1353  }
1354  Rule *rule = new Rule( this, nodeVec );
1355  rule->SetMsgType( Log().GetMinType() );
1356  return rule;
1357 }
1358 
1359 ////////////////////////////////////////////////////////////////////////////////
1360 /// Makes rule map for all events
1361 
1362 void TMVA::RuleEnsemble::MakeRuleMap(const std::vector<const Event *> *events, UInt_t ifirst, UInt_t ilast)
1363 {
1364  Log() << kVERBOSE << "Making Rule map for all events" << Endl;
1365  // make rule response map
1366  if (events==0) events = GetTrainingEvents();
1367  if ((ifirst==0) || (ilast==0) || (ifirst>ilast)) {
1368  ifirst = 0;
1369  ilast = events->size()-1;
1370  }
1371  // check if identical to previous call
1372  if ((events!=fRuleMapEvents) ||
1373  (ifirst!=fRuleMapInd0) ||
1374  (ilast !=fRuleMapInd1)) {
1375  fRuleMapOK = kFALSE;
1376  }
1377  //
1378  if (fRuleMapOK) {
1379  Log() << kVERBOSE << "<MakeRuleMap> Map is already valid" << Endl;
1380  return; // already cached
1381  }
1382  fRuleMapEvents = events;
1383  fRuleMapInd0 = ifirst;
1384  fRuleMapInd1 = ilast;
1385  // check number of rules
1386  UInt_t nrules = GetNRules();
1387  if (nrules==0) {
1388  Log() << kVERBOSE << "No rules found in MakeRuleMap()" << Endl;
1389  fRuleMapOK = kTRUE;
1390  return;
1391  }
1392  //
1393  // init map
1394  //
1395  std::vector<UInt_t> ruleind;
1396  fRuleMap.clear();
1397  for (UInt_t i=ifirst; i<=ilast; i++) {
1398  ruleind.clear();
1399  fRuleMap.push_back( ruleind );
1400  for (UInt_t r=0; r<nrules; r++) {
1401  if (fRules[r]->EvalEvent(*((*events)[i]))) {
1402  fRuleMap.back().push_back(r); // save only rules that are accepted
1403  }
1404  }
1405  }
1406  fRuleMapOK = kTRUE;
1407  Log() << kVERBOSE << "Made rule map for event# " << ifirst << " : " << ilast << Endl;
1408 }
1409 
1410 ////////////////////////////////////////////////////////////////////////////////
1411 /// std::ostream operator
1412 
1413 std::ostream& TMVA::operator<< ( std::ostream& os, const RuleEnsemble & rules )
1414 {
1415  os << "DON'T USE THIS - TO BE REMOVED" << std::endl;
1416  rules.Print();
1417  return os;
1418 }