Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodCuts.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Matt Jachowski, Peter Speckmayer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodCuts *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Multivariate optimisation of signal efficiency for given background *
12  * efficiency, using rectangular minimum and maximum requirements on *
13  * input variables *
14  * *
15  * Authors (alphabetical): *
16  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
17  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
18  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * LAPP, Annecy, France *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #ifndef ROOT_TMVA_MethodCuts
34 #define ROOT_TMVA_MethodCuts
35 
36 //////////////////////////////////////////////////////////////////////////
37 // //
38 // MethodCuts //
39 // //
40 // Multivariate optimisation of signal efficiency for given background //
41 // efficiency, using rectangular minimum and maximum requirements on //
42 // input variables //
43 // //
44 //////////////////////////////////////////////////////////////////////////
45 
46 #include <vector>
47 #include <map>
48 
49 #include "TMVA/MethodBase.h"
50 #include "TMVA/BinarySearchTree.h"
51 #include "TMVA/PDF.h"
52 #include "TMatrixDfwd.h"
53 #include "IFitterTarget.h"
54 
55 class TRandom;
56 
57 namespace TMVA {
58 
59  class Interval;
60 
61  class MethodCuts : public MethodBase, public IFitterTarget {
62 
63  public:
64 
65  MethodCuts( const TString& jobName,
66  const TString& methodTitle,
67  DataSetInfo& theData,
68  const TString& theOption = "MC:150:10000:");
69 
70  MethodCuts( DataSetInfo& theData,
71  const TString& theWeightFile);
72 
73  // this is a workaround which is necessary since CINT is not capable of handling dynamic casts
74  static MethodCuts* DynamicCast( IMethod* method ) { return dynamic_cast<MethodCuts*>(method); }
75 
76  virtual ~MethodCuts( void );
77 
78  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
79 
80  // training method
81  void Train( void );
82 
83  using MethodBase::ReadWeightsFromStream;
84 
85  void AddWeightsXMLTo ( void* parent ) const;
86 
87  void ReadWeightsFromStream( std::istream & i );
88  void ReadWeightsFromXML ( void* wghtnode );
89 
90  // calculate the MVA value (for CUTs this is just a dummy)
91  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
92 
93  // write method specific histos to target file
94  void WriteMonitoringHistosToFile( void ) const;
95 
96  // test the method
97  void TestClassification();
98 
99  // also overwrite --> not computed for cuts
100  Double_t GetSeparation ( TH1*, TH1* ) const { return -1; }
101  Double_t GetSeparation ( PDF* = 0, PDF* = 0 ) const { return -1; }
102  Double_t GetSignificance( void ) const { return -1; }
103  Double_t GetmuTransform ( TTree *) { return -1; }
104  Double_t GetEfficiency ( const TString&, Types::ETreeType, Double_t& );
105  Double_t GetTrainingEfficiency(const TString& );
106 
107  // rarity distributions (signal or background (default) is uniform in [0,1])
108  Double_t GetRarity( Double_t, Types::ESBType ) const { return 0; }
109 
110  // accessors for Minuit
111  Double_t ComputeEstimator( std::vector<Double_t> & );
112 
113  Double_t EstimatorFunction( std::vector<Double_t> & );
114  Double_t EstimatorFunction( Int_t ievt1, Int_t ievt2 );
115 
116  void SetTestSignalEfficiency( Double_t effS ) { fTestSignalEff = effS; }
117 
118  // retrieve cut values for given signal efficiency
119  void PrintCuts( Double_t effS ) const;
120  Double_t GetCuts ( Double_t effS, std::vector<Double_t>& cutMin, std::vector<Double_t>& cutMax ) const;
121  Double_t GetCuts ( Double_t effS, Double_t* cutMin, Double_t* cutMax ) const;
122 
123  // ranking of input variables (not available for cuts)
124  const Ranking* CreateRanking() { return 0; }
125 
126  void DeclareOptions();
127  void ProcessOptions();
128 
129  // maximum |cut| value
130  static const Double_t fgMaxAbsCutVal;
131 
132  // no check of options at this place
133  void CheckSetup() {}
134 
135  protected:
136 
137  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
138  void MakeClassSpecific( std::ostream&, const TString& ) const;
139 
140  // get help message text
141  void GetHelpMessage() const;
142 
143  private:
144 
145  // optimisation method
146  enum EFitMethodType { kUseMonteCarlo = 0,
147  kUseGeneticAlgorithm,
148  kUseSimulatedAnnealing,
149  kUseMinuit,
150  kUseEventScan,
151  kUseMonteCarloEvents };
152 
153  // efficiency calculation method
154  // - kUseEventSelection: computes efficiencies from given data sample
155  // - kUsePDFs : creates smoothed PDFs from data samples, and
156  // uses this to compute efficiencies
157  enum EEffMethod { kUseEventSelection = 0,
158  kUsePDFs };
159 
160  // improve the Monte Carlo by providing some additional information
161  enum EFitParameters { kNotEnforced = 0,
162  kForceMin,
163  kForceMax,
164  kForceSmart };
165 
166  // general
167  TString fFitMethodS; // chosen fit method (string)
168  EFitMethodType fFitMethod; // chosen fit method
169  TString fEffMethodS; // chosen efficiency calculation method (string)
170  EEffMethod fEffMethod; // chosen efficiency calculation method
171  std::vector<EFitParameters>* fFitParams; // vector for series of fit methods
172  Double_t fTestSignalEff; // used to test optimized signal efficiency
173  Double_t fEffSMin; // used to test optimized signal efficiency
174  Double_t fEffSMax; // used to test optimized signal efficiency
175  Double_t* fCutRangeMin; // minimum of allowed cut range
176  Double_t* fCutRangeMax; // maximum of allowed cut range
177  std::vector<Interval*> fCutRange; // allowed ranges for cut optimisation
178 
179  // for the use of the binary tree method
180  BinarySearchTree* fBinaryTreeS;
181  BinarySearchTree* fBinaryTreeB;
182 
183  // MC method
184  Double_t** fCutMin; // minimum requirement
185  Double_t** fCutMax; // maximum requirement
186  Double_t* fTmpCutMin; // temporary minimum requirement
187  Double_t* fTmpCutMax; // temporary maximum requirement
188  TString* fAllVarsI; // what to do with variables
189 
190  // relevant for all methods
191  Int_t fNpar; // number of parameters in fit (default: 2*Nvar)
192  Double_t fEffRef; // reference efficiency
193  std::vector<Int_t>* fRangeSign; // used to match cuts to fit parameters (and vice versa)
194  TRandom* fRandom; // random generator for MC optimisation method
195 
196  // basic statistics
197  std::vector<Double_t>* fMeanS; // means of variables (signal)
198  std::vector<Double_t>* fMeanB; // means of variables (background)
199  std::vector<Double_t>* fRmsS; // RMSs of variables (signal)
200  std::vector<Double_t>* fRmsB; // RMSs of variables (background)
201 
202  TH1* fEffBvsSLocal; // intermediate eff. background versus eff signal histo
203 
204  // PDF section
205  std::vector<TH1*>* fVarHistS; // reference histograms (signal)
206  std::vector<TH1*>* fVarHistB; // reference histograms (background)
207  std::vector<TH1*>* fVarHistS_smooth; // smoothed reference histograms (signal)
208  std::vector<TH1*>* fVarHistB_smooth; // smoothed reference histograms (background)
209  std::vector<PDF*>* fVarPdfS; // reference PDFs (signal)
210  std::vector<PDF*>* fVarPdfB; // reference PDFs (background)
211 
212  // negative efficiencies
213  Bool_t fNegEffWarning; // flag risen in case of negative efficiency warning
214 
215 
216  // the definition of fit parameters can be different from the actual
217  // cut requirements; these functions provide the matching
218  void MatchParsToCuts( const std::vector<Double_t>&, Double_t*, Double_t* );
219  void MatchParsToCuts( Double_t*, Double_t*, Double_t* );
220 
221  void MatchCutsToPars( std::vector<Double_t>&, Double_t*, Double_t* );
222  void MatchCutsToPars( std::vector<Double_t>&, Double_t**, Double_t**, Int_t ibin );
223 
224  // creates PDFs in case these are used to compute efficiencies
225  // (corresponds to: EffMethod == kUsePDFs)
226  void CreateVariablePDFs( void );
227 
228  // returns signal and background efficiencies for given cuts - using event counting
229  void GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
230  Double_t& effS, Double_t& effB );
231  // returns signal and background efficiencies for given cuts - using PDFs
232  void GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
233  Double_t& effS, Double_t& effB );
234 
235  // default initialisation method called by all constructors
236  void Init( void );
237 
238  ClassDef(MethodCuts,0); // Multivariate optimisation of signal efficiency
239  };
240 
241 } // namespace TMVA
242 
243 #endif