Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodDT.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodDT (Boosted Decision Trees) *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Analysis of Boosted Decision Trees *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
16  * Or Cohen <orcohenor@gmail.com> - Weizmann Inst., Israel *
17  * *
18  * Copyright (c) 2005: *
19  * CERN, Switzerland *
20  * MPI-K Heidelberg, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef ROOT_TMVA_MethodDT
28 #define ROOT_TMVA_MethodDT
29 
30 //////////////////////////////////////////////////////////////////////////
31 // //
32 // MethodDT //
33 // //
34 // Analysis of Single Decision Tree //
35 // //
36 //////////////////////////////////////////////////////////////////////////
37 
38 #include <vector>
39 #include "TH1.h"
40 #include "TH2.h"
41 #include "TTree.h"
42 #include "TMVA/MethodBase.h"
43 #include "TMVA/DecisionTree.h"
44 #include "TMVA/Event.h"
45 
46 namespace TMVA {
47  class MethodBoost;
48 
49  class MethodDT : public MethodBase {
50  public:
51  MethodDT( const TString& jobName,
52  const TString& methodTitle,
53  DataSetInfo& theData,
54  const TString& theOption = "");
55 
56  MethodDT( DataSetInfo& dsi,
57  const TString& theWeightFile);
58 
59  virtual ~MethodDT( void );
60 
61  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
62 
63  void Train( void );
64 
65  using MethodBase::ReadWeightsFromStream;
66 
67  // write weights to file
68  void AddWeightsXMLTo( void* parent ) const;
69 
70  // read weights from file
71  void ReadWeightsFromStream( std::istream& istr );
72  void ReadWeightsFromXML ( void* wghtnode );
73 
74  // calculate the MVA value
75  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
76 
77  // the option handling methods
78  void DeclareOptions();
79  void ProcessOptions();
80  void DeclareCompatibilityOptions();
81 
82  void GetHelpMessage() const;
83 
84  // ranking of input variables
85  const Ranking* CreateRanking();
86 
87  Double_t PruneTree( );
88 
89  Double_t TestTreeQuality( DecisionTree *dt );
90 
91  Double_t GetPruneStrength () { return fPruneStrength; }
92 
93  void SetMinNodeSize(Double_t sizeInPercent);
94  void SetMinNodeSize(TString sizeInPercent);
95 
96  Int_t GetNNodesBeforePruning(){return fTree->GetNNodesBeforePruning();}
97  Int_t GetNNodes(){return fTree->GetNNodes();}
98 
99  private:
100  // Init used in the various constructors
101  void Init( void );
102 
103  private:
104 
105 
106  std::vector<Event*> fEventSample; // the training events
107 
108  DecisionTree* fTree; // the decision tree
109  //options for the decision Tree
110  SeparationBase *fSepType; // the separation used in node splitting
111  TString fSepTypeS; // the separation (option string) used in node splitting
112  Int_t fMinNodeEvents; // min number of events in node
113  Float_t fMinNodeSize; // min percentage of training events in node
114  TString fMinNodeSizeS; // string containing min percentage of training events in node
115 
116  Int_t fNCuts; // grid used in cut applied in node splitting
117  Bool_t fUseYesNoLeaf; // use sig or bkg classification in leave nodes or sig/bkg
118  Double_t fNodePurityLimit; // purity limit for sig/bkg nodes
119  UInt_t fMaxDepth; // max depth
120 
121 
122  Double_t fErrorFraction; // ntuple var: misclassification error fraction
123  Double_t fPruneStrength; // a parameter to set the "amount" of pruning..needs to be adjusted
124  DecisionTree::EPruneMethod fPruneMethod; // method used for pruning
125  TString fPruneMethodS; // prune method option String
126  Bool_t fAutomatic; // use user given prune strength or automatically determined one using a validation sample
127  Bool_t fRandomisedTrees; // choose a random subset of possible cut variables at each node during training
128  Int_t fUseNvars; // the number of variables used in the randomised tree splitting
129  Bool_t fUsePoissonNvars; // fUseNvars is used as a poisson mean, and the actual value of useNvars is at each step drawn form that distribution
130  std::vector<Double_t> fVariableImportance; // the relative importance of the different variables
131 
132  Double_t fDeltaPruneStrength; // step size in pruning, is adjusted according to experience of previous trees
133  // debugging flags
134  static const Int_t fgDebugLevel = 0; // debug level determining some printout/control plots etc.
135 
136 
137  Bool_t fPruneBeforeBoost; //ancient variable, only needed for "CompatibilityOptions"
138 
139  ClassDef(MethodDT,0); // Analysis of Decision Trees
140 
141  };
142 }
143 
144 #endif