Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodPDERS.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPDERS *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Multidimensional Likelihood using the "Probability density estimator *
12  * range search" (PDERS) method suggested in *
13  * T. Carli and B. Koblitz, NIM A 501, 576 (2003) *
14  * *
15  * The multidimensional PDFs for signal and background are modeled *
16  * by counting the events in the "vicinity" of a test point. The volume *
17  * that describes "vicinity" is user-defined through the option string. *
18  * A search method based on binary-trees is used to improve the selection *
19  * efficiency of the volume search. *
20  * *
21  * Authors (alphabetical): *
22  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
23  * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
24  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
25  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
26  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
27  * *
28  * Copyright (c) 2005: *
29  * CERN, Switzerland *
30  * U. of Victoria, Canada *
31  * MPI-K Heidelberg, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 #ifndef ROOT_TMVA_MethodPDERS
39 #define ROOT_TMVA_MethodPDERS
40 
41 //////////////////////////////////////////////////////////////////////////
42 // //
43 // MethodPDERS //
44 // //
45 // Multidimensional Likelihood using the "Probability density //
46 // estimator range search" (PDERS) method //
47 // //
48 //////////////////////////////////////////////////////////////////////////
49 
50 #include "TMVA/MethodBase.h"
51 #include "TMVA/BinarySearchTree.h"
52 #include "TVector.h"
53 
54 namespace TMVA {
55 
56  class Volume;
57  class Event;
58 
59  class MethodPDERS : public MethodBase {
60 
61  public:
62 
63  MethodPDERS( const TString& jobName,
64  const TString& methodTitle,
65  DataSetInfo& theData,
66  const TString& theOption);
67 
68  MethodPDERS( DataSetInfo& theData,
69  const TString& theWeightFile);
70 
71  virtual ~MethodPDERS( void );
72 
73  virtual Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
74 
75 
76  // training method
77  void Train( void );
78 
79  // write weights to file
80  void WriteWeightsToStream( TFile& rf ) const;
81  void AddWeightsXMLTo( void* parent ) const;
82 
83  // read weights from file
84  void ReadWeightsFromStream( std::istream& istr );
85  void ReadWeightsFromStream( TFile& istr );
86  void ReadWeightsFromXML( void* wghtnode );
87 
88  // calculate the MVA value
89  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
90 
91  // calculate the MVA value
92  const std::vector<Float_t>& GetRegressionValues();
93  public:
94 
95  // for root finder
96  static Double_t IGetVolumeContentForRoot( Double_t );
97  Double_t GetVolumeContentForRoot( Double_t );
98 
99  // static pointer to this object
100  static MethodPDERS* ThisPDERS( void );
101 
102  protected:
103 
104  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
105  void MakeClassSpecific( std::ostream&, const TString& ) const;
106 
107  // get help message text
108  void GetHelpMessage() const;
109 
110  Volume* fHelpVolume; // auxiliary variable
111  Int_t fFcnCall; // number of external function calls (RootFinder)
112 
113  // accessors
114  BinarySearchTree* GetBinaryTree( void ) const { return fBinaryTree; }
115 
116  Double_t CKernelEstimate( const Event&, std::vector<const BinarySearchTreeNode*>&, Volume& );
117  void RKernelEstimate( const Event&, std::vector<const BinarySearchTreeNode*>&, Volume&, std::vector<Float_t> *pdfSum );
118 
119  Double_t ApplyKernelFunction( Double_t normalized_distance );
120  Double_t KernelNormalization( Double_t pdf );
121  Double_t GetNormalizedDistance( const TMVA::Event &base_event,
122  const BinarySearchTreeNode &sample_event,
123  Double_t *dim_normalization);
124  Double_t NormSinc( Double_t x );
125  Double_t LanczosFilter( Int_t level, Double_t x );
126 
127  // ranking of input variables
128  const Ranking* CreateRanking() { return 0; }
129 
130  private:
131 
132  // the option handling methods
133  void DeclareOptions();
134  void ProcessOptions();
135 
136  // calculate the averages of the input variables needed for adaptive training
137  void CalcAverages();
138 
139  // create binary search trees for signal and background
140  void CreateBinarySearchTree( Types::ETreeType type );
141 
142  // get sample of training events
143  void GetSample( const Event &e, std::vector<const BinarySearchTreeNode*>& events, Volume *volume);
144 
145  // option
146  TString fVolumeRange; // option volume range
147  TString fKernelString; // option kernel estimator
148 
149  enum EVolumeRangeMode {
150  kUnsupported = 0,
151  kMinMax,
152  kRMS,
153  kAdaptive,
154  kUnscaled,
155  kkNN
156  } fVRangeMode;
157 
158  enum EKernelEstimator {
159  kBox = 0,
160  kSphere,
161  kTeepee,
162  kGauss,
163  kSinc3, // the sinc enumerators must be consecutive and in order!
164  kSinc5,
165  kSinc7,
166  kSinc9,
167  kSinc11,
168  kLanczos2,
169  kLanczos3,
170  kLanczos5,
171  kLanczos8,
172  kTrim
173  } fKernelEstimator;
174 
175  BinarySearchTree* fBinaryTree; // binary tree
176 
177  std::vector<Float_t>* fDelta; // size of volume
178  std::vector<Float_t>* fShift; // volume center
179  std::vector<Float_t> fAverageRMS; // average RMS of signal and background
180 
181  Float_t fScaleS; // weight for signal events
182  Float_t fScaleB; // weight for background events
183  Float_t fDeltaFrac; // fraction of RMS
184  Double_t fGaussSigma; // size of Gauss in adaptive volume
185  Double_t fGaussSigmaNorm;// size of Gauss in adaptive volume (normalised to dimensions)
186 
187  Double_t fNRegOut; // number of output dimensions for regression
188 
189  // input for adaptive volume adjustment
190  Float_t fNEventsMin; // minimum number of events in adaptive volume
191  Float_t fNEventsMax; // maximum number of events in adaptive volume
192  Float_t fMaxVIterations;// maximum number of iterations to adapt volume size
193  Float_t fInitialScale; // initial scale for adaptive volume
194 
195  Bool_t fInitializedVolumeEle; // is volume element initialized ?
196 
197  Int_t fkNNMin; // min number of events in kNN tree
198  Int_t fkNNMax; // max number of events in kNN tree
199 
200  Double_t fMax_distance; // maximum distance
201  Bool_t fPrinted; // print
202  Bool_t fNormTree; // binary-search tree is normalised
203 
204  void SetVolumeElement ( void );
205 
206  Double_t CRScalc ( const Event& );
207  void RRScalc ( const Event&, std::vector<Float_t>* count );
208 
209  Float_t GetError ( Float_t countS, Float_t countB,
210  Float_t sumW2S, Float_t sumW2B ) const;
211 
212  // This is a workaround for OSx where static thread_local data members are
213  // not supported. The C++ solution would indeed be the following:
214  static MethodPDERS*& GetMethodPDERSThreadLocal() {TTHREAD_TLS(MethodPDERS*) fgThisPDERS(nullptr); return fgThisPDERS;};
215  void UpdateThis();
216 
217  void Init( void );
218 
219  ClassDef(MethodPDERS,0); // Multi-dimensional probability density estimator range search (PDERS) method
220  };
221 
222 } // namespace TMVA
223 
224 #endif // MethodPDERS_H