Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodPDERS.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /***********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPDERS *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Poland *
15  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16  * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Poland *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  ***********************************************************************************/
31 
32 /*! \class TMVA::MethodPDERS
33 \ingroup TMVA
34 
35 This is a generalization of the above Likelihood methods to \f$ N_{var} \f$
36 dimensions, where \f$ N_{var} \f$ is the number of input variables
37 used in the MVA. If the multi-dimensional probability density functions
38 (PDFs) for signal and background were known, this method contains the entire
39 physical information, and is therefore optimal. Usually, kernel estimation
40 methods are used to approximate the PDFs using the events from the
41 training sample.
42 
43 A very simple probability density estimator (PDE) has been suggested
44 in [hep-ex/0211019](http://arxiv.org/abs/hep-ex/0211019). The
45 PDE for a given test event is obtained from counting the (normalized)
46 number of signal and background (training) events that occur in the
47 "vicinity" of the test event. The volume that describes "vicinity" is
48 user-defined. A [search method based on binary-trees](http://arxiv.org/abs/hep-ex/0211019)
49 is used to effectively reduce the
50 selection time for the range search. Three different volume definitions
51 are optional:
52 
53  - *MinMax:* the volume is defined in each dimension with respect
54  to the full variable range found in the training sample.
55  - *RMS:* the volume is defined in each dimensions with respect
56  to the RMS estimated from the training sample.
57  - *Adaptive:* a volume element is defined in each dimensions with
58  respect to the RMS estimated from the training sample. The overall
59  scale of the volume element is then determined for each event so
60  that the total number of events confined in the volume be within
61  a user-defined range.
62 
63 The adaptive range search is used by default.
64 */
65 
66 #include "TMVA/MethodPDERS.h"
67 
68 #include "TMVA/BinaryTree.h"
69 #include "TMVA/BinarySearchTree.h"
70 #include "TMVA/Configurable.h"
71 #include "TMVA/ClassifierFactory.h"
72 #include "TMVA/Event.h"
73 #include "TMVA/IMethod.h"
74 #include "TMVA/MethodBase.h"
75 #include "TMVA/MsgLogger.h"
76 #include "TMVA/RootFinder.h"
77 #include "TMVA/Tools.h"
79 #include "TMVA/Types.h"
80 
81 #include "ThreadLocalStorage.h"
82 #include "TBuffer.h"
83 #include "TFile.h"
84 #include "TObjString.h"
85 #include "TMath.h"
86 
87 #include <assert.h>
88 #include <algorithm>
89 
90 namespace TMVA {
91  const Bool_t MethodPDERS_UseFindRoot = kFALSE;
92 };
93 
94 
95 REGISTER_METHOD(PDERS)
96 
97 ClassImp(TMVA::MethodPDERS);
98 
99 ////////////////////////////////////////////////////////////////////////////////
100 /// standard constructor for the PDERS method
101 
102  TMVA::MethodPDERS::MethodPDERS( const TString& jobName,
103  const TString& methodTitle,
104  DataSetInfo& theData,
105  const TString& theOption) :
106  MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption),
107  fFcnCall(0),
108  fVRangeMode(kAdaptive),
109  fKernelEstimator(kBox),
110  fDelta(0),
111  fShift(0),
112  fScaleS(0),
113  fScaleB(0),
114  fDeltaFrac(0),
115  fGaussSigma(0),
116  fGaussSigmaNorm(0),
117  fNRegOut(0),
118  fNEventsMin(0),
119  fNEventsMax(0),
120  fMaxVIterations(0),
121  fInitialScale(0),
122  fInitializedVolumeEle(0),
123  fkNNMin(0),
124  fkNNMax(0),
125  fMax_distance(0),
126  fPrinted(0),
127  fNormTree(0)
128 {
129  fHelpVolume = NULL;
130  fBinaryTree = NULL;
131 }
132 
133 ////////////////////////////////////////////////////////////////////////////////
134 /// construct MethodPDERS through from file
135 
136 TMVA::MethodPDERS::MethodPDERS( DataSetInfo& theData,
137  const TString& theWeightFile) :
138  MethodBase( Types::kPDERS, theData, theWeightFile),
139  fFcnCall(0),
140  fVRangeMode(kAdaptive),
141  fKernelEstimator(kBox),
142  fDelta(0),
143  fShift(0),
144  fScaleS(0),
145  fScaleB(0),
146  fDeltaFrac(0),
147  fGaussSigma(0),
148  fGaussSigmaNorm(0),
149  fNRegOut(0),
150  fNEventsMin(0),
151  fNEventsMax(0),
152  fMaxVIterations(0),
153  fInitialScale(0),
154  fInitializedVolumeEle(0),
155  fkNNMin(0),
156  fkNNMax(0),
157  fMax_distance(0),
158  fPrinted(0),
159  fNormTree(0)
160 {
161  fHelpVolume = NULL;
162  fBinaryTree = NULL;
163 }
164 
165 ////////////////////////////////////////////////////////////////////////////////
166 /// PDERS can handle classification with 2 classes and regression with one or more regression-targets
167 
168 Bool_t TMVA::MethodPDERS::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )
169 {
170  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
171  if (type == Types::kRegression) return kTRUE;
172  return kFALSE;
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// default initialisation routine called by all constructors
177 
178 void TMVA::MethodPDERS::Init( void )
179 {
180  fBinaryTree = NULL;
181 
182  UpdateThis();
183 
184  // default options
185  fDeltaFrac = 3.0;
186  fVRangeMode = kAdaptive;
187  fKernelEstimator = kBox;
188 
189  // special options for Adaptive mode
190  fNEventsMin = 100;
191  fNEventsMax = 200;
192  fMaxVIterations = 150;
193  fInitialScale = 0.99;
194  fGaussSigma = 0.1;
195  fNormTree = kFALSE;
196 
197  fkNNMin = Int_t(fNEventsMin);
198  fkNNMax = Int_t(fNEventsMax);
199 
200  fInitializedVolumeEle = kFALSE;
201  fAverageRMS.clear();
202 
203  // the minimum requirement to declare an event signal-like
204  SetSignalReferenceCut( 0.0 );
205 }
206 
207 ////////////////////////////////////////////////////////////////////////////////
208 /// destructor
209 
210 TMVA::MethodPDERS::~MethodPDERS( void )
211 {
212  if (fDelta) delete fDelta;
213  if (fShift) delete fShift;
214 
215  if (NULL != fBinaryTree) delete fBinaryTree;
216 }
217 
218 ////////////////////////////////////////////////////////////////////////////////
219 /// define the options (their key words) that can be set in the option string.
220 ///
221 /// know options:
222 /// - VolumeRangeMode <string> Method to determine volume range
223 /// available values are:
224 /// - MinMax
225 /// - Unscaled
226 /// - RMS
227 /// - kNN
228 /// - Adaptive <default>
229 ///
230 /// - KernelEstimator <string> Kernel estimation function
231 /// available values are:
232 /// - Box <default>
233 /// - Sphere
234 /// - Teepee
235 /// - Gauss
236 /// - Sinc3
237 /// - Sinc5
238 /// - Sinc7
239 /// - Sinc9
240 /// - Sinc11
241 /// - Lanczos2
242 /// - Lanczos3
243 /// - Lanczos5
244 /// - Lanczos8
245 /// - Trim
246 ///
247 /// - DeltaFrac <float> Ratio of #EventsMin/#EventsMax for MinMax and RMS volume range
248 /// - NEventsMin <int> Minimum number of events for adaptive volume range
249 /// - NEventsMax <int> Maximum number of events for adaptive volume range
250 /// - MaxVIterations <int> Maximum number of iterations for adaptive volume range
251 /// - InitialScale <float> Initial scale for adaptive volume range
252 /// - GaussSigma <float> Width with respect to the volume size of Gaussian kernel estimator
253 
254 void TMVA::MethodPDERS::DeclareOptions()
255 {
256  DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
257  AddPreDefVal(TString("Unscaled"));
258  AddPreDefVal(TString("MinMax"));
259  AddPreDefVal(TString("RMS"));
260  AddPreDefVal(TString("Adaptive"));
261  AddPreDefVal(TString("kNN"));
262 
263  DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
264  AddPreDefVal(TString("Box"));
265  AddPreDefVal(TString("Sphere"));
266  AddPreDefVal(TString("Teepee"));
267  AddPreDefVal(TString("Gauss"));
268  AddPreDefVal(TString("Sinc3"));
269  AddPreDefVal(TString("Sinc5"));
270  AddPreDefVal(TString("Sinc7"));
271  AddPreDefVal(TString("Sinc9"));
272  AddPreDefVal(TString("Sinc11"));
273  AddPreDefVal(TString("Lanczos2"));
274  AddPreDefVal(TString("Lanczos3"));
275  AddPreDefVal(TString("Lanczos5"));
276  AddPreDefVal(TString("Lanczos8"));
277  AddPreDefVal(TString("Trim"));
278 
279  DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
280  DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
281  DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
282  DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
283  DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
284  DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
285  DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
286 }
287 
288 ////////////////////////////////////////////////////////////////////////////////
289 /// process the options specified by the user
290 
291 void TMVA::MethodPDERS::ProcessOptions()
292 {
293  if (IgnoreEventsWithNegWeightsInTraining()) {
294  Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
295  << GetMethodTypeName()
296  << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
297  << Endl;
298  }
299 
300  fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
301 
302  fVRangeMode = MethodPDERS::kUnsupported;
303 
304  if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
305  else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
306  else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
307  else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
308  else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
309  else {
310  Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
311  }
312 
313  if (fKernelString == "Box" ) fKernelEstimator = kBox;
314  else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
315  else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
316  else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
317  else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
318  else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
319  else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
320  else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
321  else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
322  else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
323  else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
324  else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
325  else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
326  else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
327  else {
328  Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
329  }
330 
331  // TODO: Add parameter validation
332 
333  Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
334  << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
335  (fVRangeMode == kUnscaled) ? "Unscaled" :
336  (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
337  if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
338  Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
339  else
340  Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
341  << fNEventsMin << " " << fNEventsMax
342  << " " << fMaxVIterations << " " << fInitialScale << Endl;
343  Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
344 }
345 
346 ////////////////////////////////////////////////////////////////////////////////
347 /// this is a dummy training: the preparation work to do is the construction
348 /// of the binary tree as a pointer chain. It is easier to directly save the
349 /// trainingTree in the weight file, and to rebuild the binary tree in the
350 /// test phase from scratch
351 
352 void TMVA::MethodPDERS::Train( void )
353 {
354  if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
355  << "please remove the option from the configuration string, or "
356  << "use \"!Normalise\""
357  << Endl;
358 
359  CreateBinarySearchTree( Types::kTraining );
360 
361  CalcAverages();
362  SetVolumeElement();
363 
364  fInitializedVolumeEle = kTRUE;
365  ExitFromTraining();
366 }
367 
368 ////////////////////////////////////////////////////////////////////////////////
369 /// init the size of a volume element using a defined fraction of the
370 /// volume containing the entire events
371 
372 Double_t TMVA::MethodPDERS::GetMvaValue( Double_t* err, Double_t* errUpper )
373 {
374  if (fInitializedVolumeEle == kFALSE) {
375  fInitializedVolumeEle = kTRUE;
376 
377  // binary trees must exist
378  assert( fBinaryTree );
379 
380  CalcAverages();
381  SetVolumeElement();
382  }
383 
384  // cannot determine error
385  NoErrorCalc(err, errUpper);
386 
387  return this->CRScalc( *GetEvent() );
388 }
389 
390 ////////////////////////////////////////////////////////////////////////////////
391 
392 const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
393 {
394  if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
395  fRegressionReturnVal->clear();
396  // init the size of a volume element using a defined fraction of the
397  // volume containing the entire events
398  if (fInitializedVolumeEle == kFALSE) {
399  fInitializedVolumeEle = kTRUE;
400 
401  // binary trees must exist
402  assert( fBinaryTree );
403 
404  CalcAverages();
405 
406  SetVolumeElement();
407  }
408 
409  const Event* ev = GetEvent();
410  this->RRScalc( *ev, fRegressionReturnVal );
411 
412  Event * evT = new Event(*ev);
413  UInt_t ivar = 0;
414  for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); ++it ) {
415  evT->SetTarget(ivar,(*it));
416  ivar++;
417  }
418 
419  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
420  fRegressionReturnVal->clear();
421 
422  for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
423  fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
424  }
425 
426  delete evT;
427 
428 
429  return (*fRegressionReturnVal);
430 }
431 
432 ////////////////////////////////////////////////////////////////////////////////
433 /// compute also average RMS values required for adaptive Gaussian
434 
435 void TMVA::MethodPDERS::CalcAverages()
436 {
437  if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
438  fAverageRMS.clear();
439  fBinaryTree->CalcStatistics();
440 
441  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
442  if (!DoRegression()){ //why there are separate rms for signal and background?
443  Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
444  Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
445  fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
446  } else {
447  Float_t rms = fBinaryTree->RMS( ivar );
448  fAverageRMS.push_back( rms );
449  }
450  }
451  }
452 }
453 
454 ////////////////////////////////////////////////////////////////////////////////
455 /// create binary search trees for signal and background
456 
457 void TMVA::MethodPDERS::CreateBinarySearchTree( Types::ETreeType type )
458 {
459  if (NULL != fBinaryTree) delete fBinaryTree;
460  fBinaryTree = new BinarySearchTree();
461  if (fNormTree) {
462  fBinaryTree->SetNormalize( kTRUE );
463  }
464 
465  fBinaryTree->Fill( GetEventCollection(type) );
466 
467  if (fNormTree) {
468  fBinaryTree->NormalizeTree();
469  }
470 
471  if (!DoRegression()) {
472  // these are the signal and background scales for the weights
473  fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
474  fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
475 
476  Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
477  }
478 }
479 
480 ////////////////////////////////////////////////////////////////////////////////
481 /// defines volume dimensions
482 
483 void TMVA::MethodPDERS::SetVolumeElement( void ) {
484  if (GetNvar()==0) {
485  Log() << kFATAL << "GetNvar() == 0" << Endl;
486  return;
487  }
488 
489  // init relative scales
490  fkNNMin = Int_t(fNEventsMin);
491  fkNNMax = Int_t(fNEventsMax);
492 
493  if (fDelta) delete fDelta;
494  if (fShift) delete fShift;
495  fDelta = new std::vector<Float_t>( GetNvar() );
496  fShift = new std::vector<Float_t>( GetNvar() );
497 
498  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
499  switch (fVRangeMode) {
500 
501  case kRMS:
502  case kkNN:
503  case kAdaptive:
504  // sanity check
505  if (fAverageRMS.size() != GetNvar())
506  Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
507  (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
508  Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
509  << "\t]: " << fAverageRMS[ivar]
510  << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
511  << Endl;
512  break;
513  case kMinMax:
514  (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
515  break;
516  case kUnscaled:
517  (*fDelta)[ivar] = fDeltaFrac;
518  break;
519  default:
520  Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
521  << fVRangeMode << Endl;
522  }
523  (*fShift)[ivar] = 0.5; // volume is centered around test value
524  }
525 
526 }
527 
528 ////////////////////////////////////////////////////////////////////////////////
529 /// Interface to RootFinder
530 
531 Double_t TMVA::MethodPDERS::IGetVolumeContentForRoot( Double_t scale )
532 {
533  return ThisPDERS()->GetVolumeContentForRoot( scale );
534 }
535 
536 ////////////////////////////////////////////////////////////////////////////////
537 /// count number of events in rescaled volume
538 
539 Double_t TMVA::MethodPDERS::GetVolumeContentForRoot( Double_t scale )
540 {
541  Volume v( *fHelpVolume );
542  v.ScaleInterval( scale );
543 
544  Double_t count = GetBinaryTree()->SearchVolume( &v );
545 
546  v.Delete();
547  return count;
548 }
549 
550 void TMVA::MethodPDERS::GetSample( const Event& e,
551  std::vector<const BinarySearchTreeNode*>& events,
552  Volume *volume )
553 {
554  Float_t count = 0;
555 
556  // -------------------------------------------------------------------------
557  //
558  // ==== test of volume search =====
559  //
560  // #define TMVA::MethodPDERS__countByHand__Debug__
561 
562 #ifdef TMVA_MethodPDERS__countByHand__Debug__
563 
564  // starting values
565  count = fBinaryTree->SearchVolume( volume );
566 
567  Int_t iS = 0, iB = 0;
568  UInt_t nvar = GetNvar();
569  for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
570  const Event * ev = GetTrainingEvent(ievt_);
571  Bool_t inV;
572  for (Int_t ivar=0; ivar<nvar; ivar++) {
573  Float_t x = ev->GetValue(ivar);
574  inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
575  if (!inV) break;
576  }
577  if (inV) {
578  in++;
579  }
580  }
581  Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
582  Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
583 
584 #endif
585 
586  // -------------------------------------------------------------------------
587 
588  if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
589 
590  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
591  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
592  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
593  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
594  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
595  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
596  }
597  Volume* svolume = new Volume( lb, ub );
598  // starting values
599 
600  fBinaryTree->SearchVolume( svolume, &events );
601  }
602  else if (fVRangeMode == kAdaptive) { // adaptive volume
603 
604  // -----------------------------------------------------------------------
605 
606  // TODO: optimize, perhaps multi stage with broadening limits,
607  // or a different root finding method entirely,
608  if (MethodPDERS_UseFindRoot) {
609 
610  // that won't need to search through large volume, where the bottle neck probably is
611 
612  fHelpVolume = volume;
613 
614  UpdateThis(); // necessary update of static pointer
615  RootFinder rootFinder( this, 0.01, 50, 200, 10 );
616  Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
617 
618  volume->ScaleInterval( scale );
619 
620  fBinaryTree->SearchVolume( volume, &events );
621 
622  fHelpVolume = NULL;
623  }
624  // -----------------------------------------------------------------------
625  else {
626 
627  // starting values
628  count = fBinaryTree->SearchVolume( volume );
629 
630  Float_t nEventsO = count;
631  Int_t i_=0;
632 
633  while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
634  volume->ScaleInterval( 1.15 );
635  count = fBinaryTree->SearchVolume( volume );
636  nEventsO = count;
637  i_++;
638  }
639  if (i_ > 50) Log() << kWARNING << "warning in event: " << e
640  << ": adaptive volume pre-adjustment reached "
641  << ">50 iterations in while loop (" << i_ << ")" << Endl;
642 
643  Float_t nEventsN = nEventsO;
644  Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
645  Float_t scaleO = 1.0;
646  Float_t scaleN = fInitialScale;
647  Float_t scale = scaleN;
648  Float_t scaleBest = scaleN;
649  Float_t nEventsBest = nEventsN;
650 
651  for (Int_t ic=1; ic<fMaxVIterations; ic++) {
652  if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
653 
654  // search for events in rescaled volume
655  Volume* v = new Volume( *volume );
656  v->ScaleInterval( scale );
657  nEventsN = fBinaryTree->SearchVolume( v );
658 
659  // determine next iteration (linear approximation)
660  if (nEventsN > 1 && nEventsN - nEventsO != 0)
661  if (scaleN - scaleO != 0)
662  scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
663  else
664  scale += (-0.01); // should not actually occur...
665  else
666  scale += 0.5; // use much larger volume
667 
668  // save old scale
669  scaleN = scale;
670 
671  // take if better (don't accept it if too small number of events)
672  if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
673  (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
674  nEventsBest = nEventsN;
675  scaleBest = scale;
676  }
677 
678  v->Delete();
679  delete v;
680  }
681  else break;
682  }
683 
684  // last sanity check
685  nEventsN = nEventsBest;
686  // include "1" to cover float precision
687  if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
688  Log() << kWARNING << "warning in event " << e
689  << ": adaptive volume adjustment reached "
690  << "max. #iterations (" << fMaxVIterations << ")"
691  << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
692  << Endl;
693 
694  volume->ScaleInterval( scaleBest );
695  fBinaryTree->SearchVolume( volume, &events );
696  }
697 
698  // end of adaptive method
699 
700  } else if (fVRangeMode == kkNN) {
701  Volume v(*volume);
702 
703  events.clear();
704  // check number of signals in beginning volume
705  Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
706  //if this number is too large return fkNNMax+1
707 
708  Int_t t_times = 0; // number of iterations
709 
710  while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
711  if (kNNcount < fkNNMin) { //if we have too less points
712  Float_t scale = 2; //better scale needed
713  volume->ScaleInterval( scale );
714  }
715  else if (kNNcount > fkNNMax) { //if we have too many points
716  Float_t scale = 0.1; //better scale needed
717  volume->ScaleInterval( scale );
718  }
719  events.clear();
720 
721  v = *volume ;
722 
723  kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 ); //new search
724 
725  t_times++;
726 
727  if (t_times == fMaxVIterations) {
728  Log() << kWARNING << "warning in event" << e
729  << ": kNN volume adjustment reached "
730  << "max. #iterations (" << fMaxVIterations << ")"
731  << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
732  break;
733  }
734  }
735 
736  //vector to normalize distance in each dimension
737  Double_t *dim_normalization = new Double_t[GetNvar()];
738  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
739  dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
740  }
741 
742  std::vector<const BinarySearchTreeNode*> tempVector; // temporary vector for events
743 
744  if (kNNcount >= fkNNMin) {
745  std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
746 
747  //counting the distance for each event
748  for (Int_t j=0;j< Int_t(events.size()) ;j++)
749  (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
750 
751  //counting the fkNNMin-th element
752  std::vector<Double_t>::iterator wsk = distances->begin();
753  for (Int_t j=0;j<fkNNMin-1;++j) ++wsk;
754  std::nth_element( distances->begin(), wsk, distances->end() );
755 
756  //getting all elements that are closer than fkNNMin-th element
757  //signals
758  for (Int_t j=0;j<Int_t(events.size());j++) {
759  Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
760 
761  if (dist <= (*distances)[fkNNMin-1])
762  tempVector.push_back( events[j] );
763  }
764  fMax_distance = (*distances)[fkNNMin-1];
765  delete distances;
766  }
767  delete[] dim_normalization;
768  events = tempVector;
769 
770  } else {
771 
772  // troubles ahead...
773  Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
774  }
775  // -----------------------------------------------------------------------
776 }
777 
778 ////////////////////////////////////////////////////////////////////////////////
779 
780 Double_t TMVA::MethodPDERS::CRScalc( const Event& e )
781 {
782  std::vector<const BinarySearchTreeNode*> events;
783 
784  // computes event weight by counting number of signal and background
785  // events (of reference sample) that are found within given volume
786  // defined by the event
787  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
788  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
789 
790  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
791  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
792  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
793  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
794  }
795 
796  Volume *volume = new Volume( lb, ub );
797 
798  GetSample( e, events, volume );
799  Double_t count = CKernelEstimate( e, events, *volume );
800  delete volume;
801  delete lb;
802  delete ub;
803 
804  return count;
805 }
806 
807 ////////////////////////////////////////////////////////////////////////////////
808 
809 void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
810 {
811  std::vector<const BinarySearchTreeNode*> events;
812 
813  // computes event weight by counting number of signal and background
814  // events (of reference sample) that are found within given volume
815  // defined by the event
816  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
817  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
818 
819  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
820  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
821  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
822  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
823  }
824  Volume *volume = new Volume( lb, ub );
825 
826  GetSample( e, events, volume );
827  RKernelEstimate( e, events, *volume, count );
828 
829  delete volume;
830 
831  return;
832 }
833 ////////////////////////////////////////////////////////////////////////////////
834 /// normalization factors so we can work with radius 1 hyperspheres
835 
836 Double_t TMVA::MethodPDERS::CKernelEstimate( const Event & event,
837  std::vector<const BinarySearchTreeNode*>& events, Volume& v )
838 {
839  Double_t *dim_normalization = new Double_t[GetNvar()];
840  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
841  dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
842 
843  Double_t pdfSumS = 0;
844  Double_t pdfSumB = 0;
845 
846  // Iteration over sample points
847  for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
848 
849  // First switch to the one dimensional distance
850  Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
851 
852  // always working within the hyperelipsoid, except for when we don't
853  // note that rejection ratio goes to 1 as nvar goes to infinity
854  if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
855 
856  if ( (*iev)->GetClass()==fSignalClass )
857  pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
858  else
859  pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
860  }
861  pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
862  pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
863 
864  delete[] dim_normalization;
865 
866  if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
867  if (pdfSumB < 1e-20) return 1.0;
868  if (pdfSumS < 1e-20) return 0.0;
869 
870  Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
871  return 1.0/(r + 1.0); // TODO: propagate errors from here
872 }
873 
874 ////////////////////////////////////////////////////////////////////////////////
875 /// normalization factors so we can work with radius 1 hyperspheres
876 
877 void TMVA::MethodPDERS::RKernelEstimate( const Event & event,
878  std::vector<const BinarySearchTreeNode*>& events, Volume& v,
879  std::vector<Float_t>* pdfSum )
880 {
881  Double_t *dim_normalization = new Double_t[GetNvar()];
882  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
883  dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
884 
885  // std::vector<Float_t> pdfSum;
886  pdfSum->clear();
887  Float_t pdfDiv = 0;
888  fNRegOut = 1; // for now, regression is just for 1 dimension
889 
890  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
891  pdfSum->push_back( 0 );
892 
893  // Iteration over sample points
894  for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
895 
896  // First switch to the one dimensional distance
897  Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
898 
899  // always working within the hyperelipsoid, except for when we don't
900  // note that rejection ratio goes to 1 as nvar goes to infinity
901  if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
902 
903  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
904  pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
905  pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
906  }
907  }
908 
909  delete[] dim_normalization;
910 
911  if (pdfDiv == 0)
912  return;
913 
914  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
915  pdfSum->at(ivar) /= pdfDiv;
916 
917  return;
918 }
919 
920 ////////////////////////////////////////////////////////////////////////////////
921 /// from the normalized euclidean distance calculate the distance
922 /// for a certain kernel
923 
924 Double_t TMVA::MethodPDERS::ApplyKernelFunction (Double_t normalized_distance)
925 {
926  switch (fKernelEstimator) {
927  case kBox:
928  case kSphere:
929  return 1;
930  break;
931  case kTeepee:
932  return (1 - normalized_distance);
933  break;
934  case kGauss:
935  return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
936  break;
937  case kSinc3:
938  case kSinc5:
939  case kSinc7:
940  case kSinc9:
941  case kSinc11: {
942  Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
943  return NormSinc (side_crossings * normalized_distance);
944  }
945  break;
946  case kLanczos2:
947  return LanczosFilter (2, normalized_distance);
948  break;
949  case kLanczos3:
950  return LanczosFilter (3, normalized_distance);
951  break;
952  case kLanczos5:
953  return LanczosFilter (5, normalized_distance);
954  break;
955  case kLanczos8:
956  return LanczosFilter (8, normalized_distance);
957  break;
958  case kTrim: {
959  Double_t x = normalized_distance / fMax_distance;
960  x = 1 - x*x*x;
961  return x*x*x;
962  }
963  break;
964  default:
965  Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
966  break;
967  }
968 
969  return 0;
970 }
971 
972 ////////////////////////////////////////////////////////////////////////////////
973 /// Calculating the normalization factor only once (might need a reset at some point.
974 /// Can the method be restarted with different params?)
975 
976 Double_t TMVA::MethodPDERS::KernelNormalization (Double_t pdf)
977 {
978  // Caching jammed to disable function.
979  // It's not really useful after all, badly implemented and untested :-)
980  TTHREAD_TLS(Double_t) ret = 1.0;
981 
982  if (ret != 0.0) return ret*pdf;
983 
984  // We first normalize by the volume of the hypersphere.
985  switch (fKernelEstimator) {
986  case kBox:
987  case kSphere:
988  ret = 1.;
989  break;
990  case kTeepee:
991  ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
992  ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
993  break;
994  case kGauss:
995  // We use full range integral here. Reasonable because of the fast function decay.
996  ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
997  break;
998  case kSinc3:
999  case kSinc5:
1000  case kSinc7:
1001  case kSinc9:
1002  case kSinc11:
1003  case kLanczos2:
1004  case kLanczos3:
1005  case kLanczos5:
1006  case kLanczos8:
1007  // We use the full range integral here. Reasonable because the central lobe dominates it.
1008  ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
1009  break;
1010  default:
1011  Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
1012  }
1013 
1014  // Normalizing by the full volume
1015  ret *= ( TMath::Power (2., static_cast<Int_t>(GetNvar())) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
1016  TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
1017 
1018  return ret*pdf;
1019 }
1020 
1021 ////////////////////////////////////////////////////////////////////////////////
1022 /// We use Euclidian metric here. Might not be best or most efficient.
1023 
1024 Double_t TMVA::MethodPDERS::GetNormalizedDistance ( const Event &base_event,
1025  const BinarySearchTreeNode &sample_event,
1026  Double_t *dim_normalization)
1027 {
1028  Double_t ret=0;
1029  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1030  Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
1031  ret += dist*dist;
1032  }
1033  // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
1034  ret /= GetNvar();
1035  return TMath::Sqrt (ret);
1036 }
1037 
1038 ////////////////////////////////////////////////////////////////////////////////
1039 /// NormSinc
1040 
1041 Double_t TMVA::MethodPDERS::NormSinc (Double_t x)
1042 {
1043  if (x < 10e-10 && x > -10e-10) {
1044  return 1; // Poor man's l'Hopital
1045  }
1046 
1047  Double_t pix = TMath::Pi() * x;
1048  Double_t sinc = TMath::Sin(pix) / pix;
1049  Double_t ret;
1050 
1051  if (GetNvar() % 2)
1052  ret = TMath::Power (sinc, static_cast<Int_t>(GetNvar()));
1053  else
1054  ret = TMath::Abs (sinc) * TMath::Power (sinc, static_cast<Int_t>(GetNvar() - 1));
1055 
1056  return ret;
1057 }
1058 
1059 ////////////////////////////////////////////////////////////////////////////////
1060 /// Lanczos Filter
1061 
1062 Double_t TMVA::MethodPDERS::LanczosFilter (Int_t level, Double_t x)
1063 {
1064  if (x < 10e-10 && x > -10e-10) {
1065  return 1; // Poor man's l'Hopital
1066  }
1067 
1068  Double_t pix = TMath::Pi() * x;
1069  Double_t pixtimesn = pix * ((Double_t) level);
1070  Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
1071  Double_t ret;
1072 
1073  if (GetNvar() % 2) ret = TMath::Power (lanczos, static_cast<Int_t>(GetNvar()));
1074  else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, static_cast<Int_t>(GetNvar() - 1));
1075 
1076  return ret;
1077 }
1078 
1079 ////////////////////////////////////////////////////////////////////////////////
1080 /// statistical error estimate for RS estimator
1081 
1082 Float_t TMVA::MethodPDERS::GetError( Float_t countS, Float_t countB,
1083  Float_t sumW2S, Float_t sumW2B ) const
1084 {
1085  Float_t c = fScaleB/fScaleS;
1086  Float_t d = countS + c*countB; d *= d;
1087 
1088  if (d < 1e-10) return 1; // Error is zero because of B = S = 0
1089 
1090  Float_t f = c*c/d/d;
1091  Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
1092 
1093  if (err < 1e-10) return 1; // Error is zero because of B or S = 0
1094 
1095  return sqrt(err);
1096 }
1097 
1098 ////////////////////////////////////////////////////////////////////////////////
1099 /// write weights to xml file
1100 
1101 void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
1102 {
1103  void* wght = gTools().AddChild(parent, "Weights");
1104  if (fBinaryTree)
1105  fBinaryTree->AddXMLTo(wght);
1106  else
1107  Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
1108  //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
1109 }
1110 
1111 ////////////////////////////////////////////////////////////////////////////////
1112 
1113 void TMVA::MethodPDERS::ReadWeightsFromXML( void* wghtnode)
1114 {
1115  if (NULL != fBinaryTree) delete fBinaryTree;
1116  void* treenode = gTools().GetChild(wghtnode);
1117  fBinaryTree = TMVA::BinarySearchTree::CreateFromXML(treenode);
1118  if(!fBinaryTree)
1119  Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1120  if(!fBinaryTree)
1121  Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1122  fBinaryTree->SetPeriode( GetNvar() );
1123  fBinaryTree->CalcStatistics();
1124  fBinaryTree->CountNodes();
1125  if (fBinaryTree->GetSumOfWeights( Types::kSignal ) > 0)
1126  fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1127  else fScaleS = 1;
1128  if (fBinaryTree->GetSumOfWeights( Types::kBackground ) > 0)
1129  fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1130  else fScaleB = 1;
1131  Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1132  CalcAverages();
1133  SetVolumeElement();
1134  fInitializedVolumeEle = kTRUE;
1135 }
1136 
1137 ////////////////////////////////////////////////////////////////////////////////
1138 /// read weight info from file
1139 
1140 void TMVA::MethodPDERS::ReadWeightsFromStream( std::istream& istr)
1141 {
1142  if (NULL != fBinaryTree) delete fBinaryTree;
1143 
1144  fBinaryTree = new BinarySearchTree();
1145 
1146  istr >> *fBinaryTree;
1147 
1148  fBinaryTree->SetPeriode( GetNvar() );
1149 
1150  fBinaryTree->CalcStatistics();
1151 
1152  fBinaryTree->CountNodes();
1153 
1154  // these are the signal and background scales for the weights
1155  fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1156  fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1157 
1158  Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1159 
1160  CalcAverages();
1161 
1162  SetVolumeElement();
1163 
1164  fInitializedVolumeEle = kTRUE;
1165 }
1166 
1167 ////////////////////////////////////////////////////////////////////////////////
1168 /// write training sample (TTree) to file
1169 
1170 void TMVA::MethodPDERS::WriteWeightsToStream( TFile& ) const
1171 {
1172 }
1173 
1174 ////////////////////////////////////////////////////////////////////////////////
1175 /// read training sample from file
1176 
1177 void TMVA::MethodPDERS::ReadWeightsFromStream( TFile& /*rf*/ )
1178 {
1179 }
1180 
1181 ////////////////////////////////////////////////////////////////////////////////
1182 /// static pointer to this object
1183 
1184 TMVA::MethodPDERS* TMVA::MethodPDERS::ThisPDERS( void )
1185 {
1186  return GetMethodPDERSThreadLocal();
1187 }
1188 ////////////////////////////////////////////////////////////////////////////////
1189 /// update static this pointer
1190 
1191 void TMVA::MethodPDERS::UpdateThis( void )
1192 {
1193  GetMethodPDERSThreadLocal() = this;
1194 }
1195 
1196 ////////////////////////////////////////////////////////////////////////////////
1197 /// write specific classifier response
1198 
1199 void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1200 {
1201  fout << " // not implemented for class: \"" << className << "\"" << std::endl;
1202  fout << "};" << std::endl;
1203 }
1204 
1205 ////////////////////////////////////////////////////////////////////////////////
1206 /// get help message text
1207 ///
1208 /// typical length of text line:
1209 /// "|--------------------------------------------------------------|"
1210 
1211 void TMVA::MethodPDERS::GetHelpMessage() const
1212 {
1213  Log() << Endl;
1214  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1215  Log() << Endl;
1216  Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
1217  Log() << "to N dimensions, where N is the number of input variables used." << Endl;
1218  Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
1219  Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
1220  Log() << "were known, this classifier would exploit the full information" << Endl;
1221  Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
1222  Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
1223  Log() << "populate the multidimensional phase space. " << Endl;
1224  Log() << Endl;
1225  Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
1226  Log() << "and background events in the vicinity of a test event, and returns" << Endl;
1227  Log() << "a weight according to the majority species of the neighboring events." << Endl;
1228  Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
1229  Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
1230  Log() << Endl;
1231  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1232  Log() << Endl;
1233  Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
1234  Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
1235  Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
1236  Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
1237  Log() << Endl;
1238  Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
1239  Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
1240  Log() << "memory, limits the number of training events that can effectively be " << Endl;
1241  Log() << "used to model the multidimensional PDF." << Endl;
1242  Log() << Endl;
1243  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1244  Log() << Endl;
1245  Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
1246  Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
1247  Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
1248  Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
1249  Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
1250 }