Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TKDE.h
Go to the documentation of this file.
1 // @(#)root/hist:$Id$
2 // Authors: Bartolomeu Rabacal 07/2010
3 /**********************************************************************
4  * *
5  * Copyright (c) 2006 , LCG ROOT MathLib Team *
6  * *
7  * *
8  **********************************************************************/
9 // Header file for TKDE
10 
11 #ifndef ROOT_TKDE
12 #define ROOT_TKDE
13 
14 #include "Math/WrappedFunction.h"
15 
16 #include "TNamed.h"
17 
18 #include "Math/Math.h"
19 
20 //#include "TF1.h"
21 class TGraphErrors;
22 class TF1;
23 
24 /*
25  Kernel Density Estimation class. The three main references are (1) "Scott DW, Multivariate Density Estimation.
26 Theory, Practice and Visualization. New York: Wiley", (2) "Jann Ben - ETH Zurich, Switzerland -, Univariate kernel density estimation document for KDENS: Stata module for univariate kernel density estimation." and (3) "Hardle W, Muller M, Sperlich S, Werwatz A, Nonparametric and Semiparametric Models. Springer."
27  The algorithm is briefly described in (4) "Cranmer KS, Kernel Estimation in High-Energy
28 Physics. Computer Physics Communications 136:198-207,2001" - e-Print Archive: hep ex/0011057.
29  A binned version is also implemented to address the performance issue due to its data size dependence.
30 */
31 class TKDE : public TNamed {
32 public:
33 
34  enum EKernelType { // Kernel function type option
35  kGaussian,
36  kEpanechnikov,
37  kBiweight,
38  kCosineArch,
39  kUserDefined, // Internal use only for the class's template constructor
40  kTotalKernels // Internal use only for member initialization
41  };
42 
43  enum EIteration { // KDE fitting option
44  kAdaptive,
45  kFixed
46  };
47 
48  enum EMirror { // Data "mirroring" option to address the probability "spill out" boundary effect
49  kNoMirror,
50  kMirrorLeft,
51  kMirrorRight,
52  kMirrorBoth,
53  kMirrorAsymLeft,
54  kMirrorAsymLeftRight,
55  kMirrorAsymRight,
56  kMirrorLeftAsymRight,
57  kMirrorAsymBoth
58  };
59 
60  enum EBinning{ // Data binning option
61  kUnbinned,
62  kRelaxedBinning, // The algorithm is allowed to use binning if the data is large enough
63  kForcedBinning
64  };
65 
66 
67  TKDE(); // defaul constructor used only by I/O
68 
69  TKDE(UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
70  "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
71  Instantiate( nullptr, events, data, nullptr, xMin, xMax, option, rho);
72  }
73 
74  TKDE(UInt_t events, const Double_t* data, const Double_t* dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option =
75  "KernelType:Gaussian;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
76  Instantiate( nullptr, events, data, dataWeight, xMin, xMax, option, rho);
77  }
78 
79  template<class KernelFunction>
80  TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
81  Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, nullptr, xMin, xMax, option, rho);
82  }
83  template<class KernelFunction>
84  TKDE(const Char_t* /*name*/, const KernelFunction& kernfunc, UInt_t events, const Double_t* data, const Double_t * dataWeight, Double_t xMin = 0.0, Double_t xMax = 0.0, const Option_t* option = "KernelType:UserDefined;Iteration:Adaptive;Mirror:noMirror;Binning:RelaxedBinning", Double_t rho = 1.0) {
85  Instantiate(new ROOT::Math::WrappedFunction<const KernelFunction&>(kernfunc), events, data, dataWeight, xMin, xMax, option, rho);
86  }
87 
88  virtual ~TKDE();
89 
90  void Fill(Double_t data);
91  void Fill(Double_t data, Double_t weight);
92  void SetKernelType(EKernelType kern);
93  void SetIteration(EIteration iter);
94  void SetMirror(EMirror mir);
95  void SetBinning(EBinning);
96  void SetNBins(UInt_t nbins);
97  void SetUseBinsNEvents(UInt_t nEvents);
98  void SetTuneFactor(Double_t rho);
99  void SetRange(Double_t xMin, Double_t xMax); // By default computed from the data
100 
101  virtual void Draw(const Option_t* option = "");
102 
103  Double_t operator()(Double_t x) const;
104  Double_t operator()(const Double_t* x, const Double_t* p=0) const; // Needed for creating TF1
105 
106  Double_t GetValue(Double_t x) const { return (*this)(x); }
107  Double_t GetError(Double_t x) const;
108 
109  Double_t GetBias(Double_t x) const;
110  Double_t GetMean() const;
111  Double_t GetSigma() const;
112  Double_t GetRAMISE() const;
113 
114  Double_t GetFixedWeight() const;
115 
116  TF1* GetFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
117  TF1* GetUpperFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
118  TF1* GetLowerFunction(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
119  TF1* GetApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
120  TGraphErrors * GetGraphWithErrors(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
121 
122  // get the drawn object to chanage settings
123  // These objects are managed by TKDE and should not be deleted by the user
124  TF1 * GetDrawnFunction() { return fPDF;}
125  TF1 * GetDrawnUpperFunction() { return fUpperPDF;}
126  TF1 * GetDrawnLowerFunction() { return fLowerPDF;}
127  TGraphErrors * GetDrawnGraph() { return fGraph;}
128 
129  const Double_t * GetAdaptiveWeights() const;
130 
131 
132 private:
133 
134  TKDE(TKDE& kde); // Disallowed copy constructor
135  TKDE operator=(TKDE& kde); // Disallowed assign operator
136 
137  typedef ROOT::Math::IBaseFunctionOneDim* KernelFunction_Ptr;
138  KernelFunction_Ptr fKernelFunction; //! pointer to kernel function
139 
140  class TKernel;
141  friend class TKernel;
142 
143  TKernel* fKernel; //! internal kernel class. Transient because it is recreated after reading from a file
144 
145  std::vector<Double_t> fData; // Data events
146  std::vector<Double_t> fEvents; // Original data storage
147  std::vector<Double_t> fEventWeights; // Original data weights
148 
149  TF1* fPDF; //! Output Kernel Density Estimation PDF function
150  TF1* fUpperPDF; //! Output Kernel Density Estimation upper confidence interval PDF function
151  TF1* fLowerPDF; //! Output Kernel Density Estimation lower confidence interval PDF function
152  TF1* fApproximateBias; //! Output Kernel Density Estimation approximate bias
153  TGraphErrors* fGraph; //! Graph with the errors
154 
155  EKernelType fKernelType;
156  EIteration fIteration;
157  EMirror fMirror;
158  EBinning fBinning;
159 
160 
161  Bool_t fUseMirroring, fMirrorLeft, fMirrorRight, fAsymLeft, fAsymRight;
162  Bool_t fUseBins;
163  Bool_t fNewData; // flag to control when new data are given
164  Bool_t fUseMinMaxFromData; // flag top control if min and max must be used from data
165 
166  UInt_t fNBins; // Number of bins for binned data option
167  UInt_t fNEvents; // Data's number of events
168  Double_t fSumOfCounts; // Data sum of weights
169  UInt_t fUseBinsNEvents; // If the algorithm is allowed to use automatic (relaxed) binning this is the minimum number of events to do so
170 
171  Double_t fMean; // Data mean
172  Double_t fSigma; // Data std deviation
173  Double_t fSigmaRob; // Data std deviation (robust estimation)
174  Double_t fXMin; // Data minimum value
175  Double_t fXMax; // Data maximum value
176  Double_t fRho; // Adjustment factor for sigma
177  Double_t fAdaptiveBandwidthFactor; // Geometric mean of the kernel density estimation from the data for adaptive iteration
178 
179  Double_t fWeightSize; // Caches the weight size
180 
181  std::vector<Double_t> fCanonicalBandwidths;
182  std::vector<Double_t> fKernelSigmas2;
183 
184  std::vector<Double_t> fBinCount; // Number of events per bin for binned data option
185 
186  std::vector<Bool_t> fSettedOptions; // User input options flag
187 
188  struct KernelIntegrand;
189  friend struct KernelIntegrand;
190 
191  void Instantiate(KernelFunction_Ptr kernfunc, UInt_t events, const Double_t* data, const Double_t* weight,
192  Double_t xMin, Double_t xMax, const Option_t* option, Double_t rho);
193 
194  inline Double_t GaussianKernel(Double_t x) const {
195  // Returns the kernel evaluation at x
196  Double_t k2_PI_ROOT_INV = 0.398942280401432703; // (2 * M_PI)**-0.5
197  return (x > -9. && x < 9.) ? k2_PI_ROOT_INV * std::exp(-.5 * x * x) : 0.0;
198  }
199  inline Double_t EpanechnikovKernel(Double_t x) const {
200  return (x > -1. && x < 1.) ? 3. / 4. * (1. - x * x) : 0.0;
201  }
202  inline Double_t BiweightKernel(Double_t x) const {
203  // Returns the kernel evaluation at x
204  return (x > -1. && x < 1.) ? 15. / 16. * (1. - x * x) * (1. - x * x) : 0.0;
205  }
206  inline Double_t CosineArchKernel(Double_t x) const {
207  // Returns the kernel evaluation at x
208  return (x > -1. && x < 1.) ? M_PI_4 * std::cos(M_PI_2 * x) : 0.0;
209  }
210  Double_t UpperConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
211  Double_t LowerConfidenceInterval(const Double_t* x, const Double_t* p) const; // Valid if the bandwidth is small compared to nEvents**1/5
212  Double_t ApproximateBias(const Double_t* x, const Double_t* ) const { return GetBias(*x); }
213  Double_t ComputeKernelL2Norm() const;
214  Double_t ComputeKernelSigma2() const;
215  Double_t ComputeKernelMu() const;
216  Double_t ComputeKernelIntegral() const;
217  Double_t ComputeMidspread() ;
218  void ComputeDataStats() ;
219 
220  UInt_t Index(Double_t x) const;
221 
222  void SetBinCentreData(Double_t xmin, Double_t xmax);
223  void SetBinCountData();
224  void CheckKernelValidity();
225  void SetUserCanonicalBandwidth();
226  void SetUserKernelSigma2();
227  void SetCanonicalBandwidths();
228  void SetKernelSigmas2();
229  void SetHistogram();
230  void SetUseBins();
231  void SetMirror();
232  void SetMean();
233  void SetSigma(Double_t R);
234  void SetKernel();
235  void SetKernelFunction(KernelFunction_Ptr kernfunc = 0);
236  void SetOptions(const Option_t* option, Double_t rho);
237  void CheckOptions(Bool_t isUserDefinedKernel = kFALSE);
238  void GetOptions(std::string optionType, std::string option);
239  void AssureOptions();
240  void SetData(const Double_t* data, const Double_t * weights);
241  void ReInit();
242  void InitFromNewData();
243  void SetMirroredEvents();
244  void SetDrawOptions(const Option_t* option, TString& plotOpt, TString& drawOpt);
245  void DrawErrors(TString& drawOpt);
246  void DrawConfidenceInterval(TString& drawOpt, double cl=0.95);
247 
248  TF1* GetKDEFunction(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
249  TF1* GetKDEApproximateBias(UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
250  // The density to estimate should be at least twice differentiable.
251  TF1* GetPDFUpperConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
252  TF1* GetPDFLowerConfidenceInterval(Double_t confidenceLevel = 0.95, UInt_t npx = 100, Double_t xMin = 1.0, Double_t xMax = 0.0);
253 
254  ClassDef(TKDE, 2) // One dimensional semi-parametric Kernel Density Estimation
255 
256 };
257 
258 #endif